]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
940cdacd1e3e737defeb9b377ba25db69b165afa
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5
6 use Cwd 'abs_path';
7 use Digest::SHA;
8 use Fcntl ':flock';
9 use Fcntl;
10 use File::Basename;
11 use File::Copy qw(copy);
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use IO::Dir;
16 use IO::File;
17 use IO::Handle;
18 use IO::Select;
19 use IO::Socket::UNIX;
20 use IPC::Open3;
21 use JSON;
22 use List::Util qw(first);
23 use MIME::Base64;
24 use POSIX;
25 use Storable qw(dclone);
26 use Time::HiRes qw(gettimeofday usleep);
27 use URI::Escape;
28 use UUID;
29
30 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
31 use PVE::CGroup;
32 use PVE::CpuSet;
33 use PVE::DataCenterConfig;
34 use PVE::Exception qw(raise raise_param_exc);
35 use PVE::Format qw(render_duration render_bytes);
36 use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
37 use PVE::Mapping::PCI;
38 use PVE::Mapping::USB;
39 use PVE::INotify;
40 use PVE::JSONSchema qw(get_standard_option parse_property_string);
41 use PVE::ProcFSTools;
42 use PVE::PBSClient;
43 use PVE::RESTEnvironment qw(log_warn);
44 use PVE::RPCEnvironment;
45 use PVE::Storage;
46 use PVE::SysFSTools;
47 use PVE::Systemd;
48 use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
49
50 use PVE::QMPClient;
51 use PVE::QemuConfig;
52 use PVE::QemuServer::Helpers qw(min_version config_aware_timeout windows_version);
53 use PVE::QemuServer::Cloudinit;
54 use PVE::QemuServer::CGroup;
55 use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
56 use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
57 use PVE::QemuServer::Machine;
58 use PVE::QemuServer::Memory;
59 use PVE::QemuServer::Monitor qw(mon_cmd);
60 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
61 use PVE::QemuServer::USB;
62
63 my $have_sdn;
64 eval {
65 require PVE::Network::SDN::Zones;
66 $have_sdn = 1;
67 };
68
69 my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
70 my $OVMF = {
71 x86_64 => {
72 '4m-no-smm' => [
73 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
74 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
75 ],
76 '4m-no-smm-ms' => [
77 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
78 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
79 ],
80 '4m' => [
81 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
82 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
83 ],
84 '4m-ms' => [
85 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
86 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
87 ],
88 default => [
89 "$EDK2_FW_BASE/OVMF_CODE.fd",
90 "$EDK2_FW_BASE/OVMF_VARS.fd",
91 ],
92 },
93 aarch64 => {
94 default => [
95 "$EDK2_FW_BASE/AAVMF_CODE.fd",
96 "$EDK2_FW_BASE/AAVMF_VARS.fd",
97 ],
98 },
99 };
100
101 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
102
103 # Note about locking: we use flock on the config file protect against concurent actions.
104 # Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
105 # 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
106 # But you can ignore this kind of lock with the --skiplock flag.
107
108 cfs_register_file(
109 '/qemu-server/',
110 \&parse_vm_config,
111 \&write_vm_config
112 );
113
114 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
115 description => "Some command save/restore state from this location.",
116 type => 'string',
117 maxLength => 128,
118 optional => 1,
119 });
120
121 PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
122 description => "Specifies the QEMU machine type.",
123 type => 'string',
124 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
125 maxLength => 40,
126 optional => 1,
127 });
128
129 # FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way
130 my $nodename_cache;
131 sub nodename {
132 $nodename_cache //= PVE::INotify::nodename();
133 return $nodename_cache;
134 }
135
136 my $watchdog_fmt = {
137 model => {
138 default_key => 1,
139 type => 'string',
140 enum => [qw(i6300esb ib700)],
141 description => "Watchdog type to emulate.",
142 default => 'i6300esb',
143 optional => 1,
144 },
145 action => {
146 type => 'string',
147 enum => [qw(reset shutdown poweroff pause debug none)],
148 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
149 optional => 1,
150 },
151 };
152 PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
153
154 my $agent_fmt = {
155 enabled => {
156 description => "Enable/disable communication with a QEMU Guest Agent (QGA) running in the VM.",
157 type => 'boolean',
158 default => 0,
159 default_key => 1,
160 },
161 fstrim_cloned_disks => {
162 description => "Run fstrim after moving a disk or migrating the VM.",
163 type => 'boolean',
164 optional => 1,
165 default => 0,
166 },
167 'freeze-fs-on-backup' => {
168 description => "Freeze/thaw guest filesystems on backup for consistency.",
169 type => 'boolean',
170 optional => 1,
171 default => 1,
172 },
173 type => {
174 description => "Select the agent type",
175 type => 'string',
176 default => 'virtio',
177 optional => 1,
178 enum => [qw(virtio isa)],
179 },
180 };
181
182 my $vga_fmt = {
183 type => {
184 description => "Select the VGA type.",
185 type => 'string',
186 default => 'std',
187 optional => 1,
188 default_key => 1,
189 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware)],
190 },
191 memory => {
192 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
193 type => 'integer',
194 optional => 1,
195 minimum => 4,
196 maximum => 512,
197 },
198 };
199
200 my $ivshmem_fmt = {
201 size => {
202 type => 'integer',
203 minimum => 1,
204 description => "The size of the file in MB.",
205 },
206 name => {
207 type => 'string',
208 pattern => '[a-zA-Z0-9\-]+',
209 optional => 1,
210 format_description => 'string',
211 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
212 },
213 };
214
215 my $audio_fmt = {
216 device => {
217 type => 'string',
218 enum => [qw(ich9-intel-hda intel-hda AC97)],
219 description => "Configure an audio device."
220 },
221 driver => {
222 type => 'string',
223 enum => ['spice', 'none'],
224 default => 'spice',
225 optional => 1,
226 description => "Driver backend for the audio device."
227 },
228 };
229
230 my $spice_enhancements_fmt = {
231 foldersharing => {
232 type => 'boolean',
233 optional => 1,
234 default => '0',
235 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
236 },
237 videostreaming => {
238 type => 'string',
239 enum => ['off', 'all', 'filter'],
240 default => 'off',
241 optional => 1,
242 description => "Enable video streaming. Uses compression for detected video streams."
243 },
244 };
245
246 my $rng_fmt = {
247 source => {
248 type => 'string',
249 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
250 default_key => 1,
251 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
252 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
253 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
254 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
255 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
256 ." a hardware RNG from the host.",
257 },
258 max_bytes => {
259 type => 'integer',
260 description => "Maximum bytes of entropy allowed to get injected into the guest every"
261 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
262 ." `0` to disable limiting (potentially dangerous!).",
263 optional => 1,
264
265 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
266 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
267 # reading from /dev/urandom
268 default => 1024,
269 },
270 period => {
271 type => 'integer',
272 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
273 ." the guest to retrieve another 'max_bytes' of entropy.",
274 optional => 1,
275 default => 1000,
276 },
277 };
278
279 my $meta_info_fmt = {
280 'ctime' => {
281 type => 'integer',
282 description => "The guest creation timestamp as UNIX epoch time",
283 minimum => 0,
284 optional => 1,
285 },
286 'creation-qemu' => {
287 type => 'string',
288 description => "The QEMU (machine) version from the time this VM was created.",
289 pattern => '\d+(\.\d+)+',
290 optional => 1,
291 },
292 };
293
294 my $confdesc = {
295 onboot => {
296 optional => 1,
297 type => 'boolean',
298 description => "Specifies whether a VM will be started during system bootup.",
299 default => 0,
300 },
301 autostart => {
302 optional => 1,
303 type => 'boolean',
304 description => "Automatic restart after crash (currently ignored).",
305 default => 0,
306 },
307 hotplug => {
308 optional => 1,
309 type => 'string', format => 'pve-hotplug-features',
310 description => "Selectively enable hotplug features. This is a comma separated list of"
311 ." hotplug features: 'network', 'disk', 'cpu', 'memory', 'usb' and 'cloudinit'. Use '0' to disable"
312 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`."
313 ." USB hotplugging is possible for guests with machine version >= 7.1 and ostype l26 or"
314 ." windows > 7.",
315 default => 'network,disk,usb',
316 },
317 reboot => {
318 optional => 1,
319 type => 'boolean',
320 description => "Allow reboot. If set to '0' the VM exit on reboot.",
321 default => 1,
322 },
323 lock => {
324 optional => 1,
325 type => 'string',
326 description => "Lock/unlock the VM.",
327 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
328 },
329 cpulimit => {
330 optional => 1,
331 type => 'number',
332 description => "Limit of CPU usage.",
333 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
334 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
335 minimum => 0,
336 maximum => 128,
337 default => 0,
338 },
339 cpuunits => {
340 optional => 1,
341 type => 'integer',
342 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
343 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
344 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
345 ." weights of all the other running VMs.",
346 minimum => 1,
347 maximum => 262144,
348 default => 'cgroup v1: 1024, cgroup v2: 100',
349 },
350 memory => {
351 optional => 1,
352 type => 'integer',
353 description => "Amount of RAM for the VM in MiB. This is the maximum available memory when"
354 ." you use the balloon device.",
355 minimum => 16,
356 default => 512,
357 },
358 balloon => {
359 optional => 1,
360 type => 'integer',
361 description => "Amount of target RAM for the VM in MiB. Using zero disables the ballon driver.",
362 minimum => 0,
363 },
364 shares => {
365 optional => 1,
366 type => 'integer',
367 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
368 ." more memory this VM gets. Number is relative to weights of all other running VMs."
369 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
370 minimum => 0,
371 maximum => 50000,
372 default => 1000,
373 },
374 keyboard => {
375 optional => 1,
376 type => 'string',
377 description => "Keyboard layout for VNC server. This option is generally not required and"
378 ." is often better handled from within the guest OS.",
379 enum => PVE::Tools::kvmkeymaplist(),
380 default => undef,
381 },
382 name => {
383 optional => 1,
384 type => 'string', format => 'dns-name',
385 description => "Set a name for the VM. Only used on the configuration web interface.",
386 },
387 scsihw => {
388 optional => 1,
389 type => 'string',
390 description => "SCSI controller model",
391 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
392 default => 'lsi',
393 },
394 description => {
395 optional => 1,
396 type => 'string',
397 description => "Description for the VM. Shown in the web-interface VM's summary."
398 ." This is saved as comment inside the configuration file.",
399 maxLength => 1024 * 8,
400 },
401 ostype => {
402 optional => 1,
403 type => 'string',
404 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
405 description => "Specify guest operating system.",
406 verbose_description => <<EODESC,
407 Specify guest operating system. This is used to enable special
408 optimization/features for specific operating systems:
409
410 [horizontal]
411 other;; unspecified OS
412 wxp;; Microsoft Windows XP
413 w2k;; Microsoft Windows 2000
414 w2k3;; Microsoft Windows 2003
415 w2k8;; Microsoft Windows 2008
416 wvista;; Microsoft Windows Vista
417 win7;; Microsoft Windows 7
418 win8;; Microsoft Windows 8/2012/2012r2
419 win10;; Microsoft Windows 10/2016/2019
420 win11;; Microsoft Windows 11/2022
421 l24;; Linux 2.4 Kernel
422 l26;; Linux 2.6 - 6.X Kernel
423 solaris;; Solaris/OpenSolaris/OpenIndiania kernel
424 EODESC
425 },
426 boot => {
427 optional => 1,
428 type => 'string', format => 'pve-qm-boot',
429 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
430 ." key or 'legacy=' is deprecated.",
431 },
432 bootdisk => {
433 optional => 1,
434 type => 'string', format => 'pve-qm-bootdisk',
435 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
436 pattern => '(ide|sata|scsi|virtio)\d+',
437 },
438 smp => {
439 optional => 1,
440 type => 'integer',
441 description => "The number of CPUs. Please use option -sockets instead.",
442 minimum => 1,
443 default => 1,
444 },
445 sockets => {
446 optional => 1,
447 type => 'integer',
448 description => "The number of CPU sockets.",
449 minimum => 1,
450 default => 1,
451 },
452 cores => {
453 optional => 1,
454 type => 'integer',
455 description => "The number of cores per socket.",
456 minimum => 1,
457 default => 1,
458 },
459 numa => {
460 optional => 1,
461 type => 'boolean',
462 description => "Enable/disable NUMA.",
463 default => 0,
464 },
465 hugepages => {
466 optional => 1,
467 type => 'string',
468 description => "Enable/disable hugepages memory.",
469 enum => [qw(any 2 1024)],
470 },
471 keephugepages => {
472 optional => 1,
473 type => 'boolean',
474 default => 0,
475 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
476 ." after VM shutdown and can be used for subsequent starts.",
477 },
478 vcpus => {
479 optional => 1,
480 type => 'integer',
481 description => "Number of hotplugged vcpus.",
482 minimum => 1,
483 default => 0,
484 },
485 acpi => {
486 optional => 1,
487 type => 'boolean',
488 description => "Enable/disable ACPI.",
489 default => 1,
490 },
491 agent => {
492 optional => 1,
493 description => "Enable/disable communication with the QEMU Guest Agent and its properties.",
494 type => 'string',
495 format => $agent_fmt,
496 },
497 kvm => {
498 optional => 1,
499 type => 'boolean',
500 description => "Enable/disable KVM hardware virtualization.",
501 default => 1,
502 },
503 tdf => {
504 optional => 1,
505 type => 'boolean',
506 description => "Enable/disable time drift fix.",
507 default => 0,
508 },
509 localtime => {
510 optional => 1,
511 type => 'boolean',
512 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
513 ." the `ostype` indicates a Microsoft Windows OS.",
514 },
515 freeze => {
516 optional => 1,
517 type => 'boolean',
518 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
519 },
520 vga => {
521 optional => 1,
522 type => 'string', format => $vga_fmt,
523 description => "Configure the VGA hardware.",
524 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
525 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
526 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
527 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
528 ." display server. For win* OS you can select how many independent displays you want,"
529 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
530 ." using a serial device as terminal.",
531 },
532 watchdog => {
533 optional => 1,
534 type => 'string', format => 'pve-qm-watchdog',
535 description => "Create a virtual hardware watchdog device.",
536 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
537 ." action), the watchdog must be periodically polled by an agent inside the guest or"
538 ." else the watchdog will reset the guest (or execute the respective action specified)",
539 },
540 startdate => {
541 optional => 1,
542 type => 'string',
543 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
544 description => "Set the initial date of the real time clock. Valid format for date are:"
545 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
546 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
547 default => 'now',
548 },
549 startup => get_standard_option('pve-startup-order'),
550 template => {
551 optional => 1,
552 type => 'boolean',
553 description => "Enable/disable Template.",
554 default => 0,
555 },
556 args => {
557 optional => 1,
558 type => 'string',
559 description => "Arbitrary arguments passed to kvm.",
560 verbose_description => <<EODESCR,
561 Arbitrary arguments passed to kvm, for example:
562
563 args: -no-reboot -smbios 'type=0,vendor=FOO'
564
565 NOTE: this option is for experts only.
566 EODESCR
567 },
568 tablet => {
569 optional => 1,
570 type => 'boolean',
571 default => 1,
572 description => "Enable/disable the USB tablet device.",
573 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
574 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
575 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
576 ." may consider disabling this to save some context switches. This is turned off by"
577 ." default if you use spice (`qm set <vmid> --vga qxl`).",
578 },
579 migrate_speed => {
580 optional => 1,
581 type => 'integer',
582 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
583 minimum => 0,
584 default => 0,
585 },
586 migrate_downtime => {
587 optional => 1,
588 type => 'number',
589 description => "Set maximum tolerated downtime (in seconds) for migrations.",
590 minimum => 0,
591 default => 0.1,
592 },
593 cdrom => {
594 optional => 1,
595 type => 'string', format => 'pve-qm-ide',
596 typetext => '<volume>',
597 description => "This is an alias for option -ide2",
598 },
599 cpu => {
600 optional => 1,
601 description => "Emulated CPU type.",
602 type => 'string',
603 format => 'pve-vm-cpu-conf',
604 },
605 parent => get_standard_option('pve-snapshot-name', {
606 optional => 1,
607 description => "Parent snapshot name. This is used internally, and should not be modified.",
608 }),
609 snaptime => {
610 optional => 1,
611 description => "Timestamp for snapshots.",
612 type => 'integer',
613 minimum => 0,
614 },
615 vmstate => {
616 optional => 1,
617 type => 'string', format => 'pve-volume-id',
618 description => "Reference to a volume which stores the VM state. This is used internally"
619 ." for snapshots.",
620 },
621 vmstatestorage => get_standard_option('pve-storage-id', {
622 description => "Default storage for VM state volumes/files.",
623 optional => 1,
624 }),
625 runningmachine => get_standard_option('pve-qemu-machine', {
626 description => "Specifies the QEMU machine type of the running vm. This is used internally"
627 ." for snapshots.",
628 }),
629 runningcpu => {
630 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
631 ." internally for snapshots.",
632 optional => 1,
633 type => 'string',
634 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
635 format_description => 'QEMU -cpu parameter'
636 },
637 machine => get_standard_option('pve-qemu-machine'),
638 arch => {
639 description => "Virtual processor architecture. Defaults to the host.",
640 optional => 1,
641 type => 'string',
642 enum => [qw(x86_64 aarch64)],
643 },
644 smbios1 => {
645 description => "Specify SMBIOS type 1 fields.",
646 type => 'string', format => 'pve-qm-smbios1',
647 maxLength => 512,
648 optional => 1,
649 },
650 protection => {
651 optional => 1,
652 type => 'boolean',
653 description => "Sets the protection flag of the VM. This will disable the remove VM and"
654 ." remove disk operations.",
655 default => 0,
656 },
657 bios => {
658 optional => 1,
659 type => 'string',
660 enum => [ qw(seabios ovmf) ],
661 description => "Select BIOS implementation.",
662 default => 'seabios',
663 },
664 vmgenid => {
665 type => 'string',
666 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
667 format_description => 'UUID',
668 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
669 ." to disable explicitly.",
670 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
671 ." value identifier to the guest OS. This allows to notify the guest operating system"
672 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
673 ." execution or creation from a template). The guest operating system notices the"
674 ." change, and is then able to react as appropriate by marking its copies of"
675 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
676 ."Note that auto-creation only works when done through API/CLI create or update methods"
677 .", but not when manually editing the config file.",
678 default => "1 (autogenerated)",
679 optional => 1,
680 },
681 hookscript => {
682 type => 'string',
683 format => 'pve-volume-id',
684 optional => 1,
685 description => "Script that will be executed during various steps in the vms lifetime.",
686 },
687 ivshmem => {
688 type => 'string',
689 format => $ivshmem_fmt,
690 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
691 ." the host.",
692 optional => 1,
693 },
694 audio0 => {
695 type => 'string',
696 format => $audio_fmt,
697 description => "Configure a audio device, useful in combination with QXL/Spice.",
698 optional => 1
699 },
700 spice_enhancements => {
701 type => 'string',
702 format => $spice_enhancements_fmt,
703 description => "Configure additional enhancements for SPICE.",
704 optional => 1
705 },
706 tags => {
707 type => 'string', format => 'pve-tag-list',
708 description => 'Tags of the VM. This is only meta information.',
709 optional => 1,
710 },
711 rng0 => {
712 type => 'string',
713 format => $rng_fmt,
714 description => "Configure a VirtIO-based Random Number Generator.",
715 optional => 1,
716 },
717 meta => {
718 type => 'string',
719 format => $meta_info_fmt,
720 description => "Some (read-only) meta-information about this guest.",
721 optional => 1,
722 },
723 affinity => {
724 type => 'string', format => 'pve-cpuset',
725 description => "List of host cores used to execute guest processes, for example: 0,5,8-11",
726 optional => 1,
727 },
728 };
729
730 my $cicustom_fmt = {
731 meta => {
732 type => 'string',
733 optional => 1,
734 description => 'Specify a custom file containing all meta data passed to the VM via"
735 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
736 format => 'pve-volume-id',
737 format_description => 'volume',
738 },
739 network => {
740 type => 'string',
741 optional => 1,
742 description => 'To pass a custom file containing all network data to the VM via cloud-init.',
743 format => 'pve-volume-id',
744 format_description => 'volume',
745 },
746 user => {
747 type => 'string',
748 optional => 1,
749 description => 'To pass a custom file containing all user data to the VM via cloud-init.',
750 format => 'pve-volume-id',
751 format_description => 'volume',
752 },
753 vendor => {
754 type => 'string',
755 optional => 1,
756 description => 'To pass a custom file containing all vendor data to the VM via cloud-init.',
757 format => 'pve-volume-id',
758 format_description => 'volume',
759 },
760 };
761 PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
762
763 my $confdesc_cloudinit = {
764 citype => {
765 optional => 1,
766 type => 'string',
767 description => 'Specifies the cloud-init configuration format. The default depends on the'
768 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
769 .' and `configdrive2` for windows.',
770 enum => ['configdrive2', 'nocloud', 'opennebula'],
771 },
772 ciuser => {
773 optional => 1,
774 type => 'string',
775 description => "cloud-init: User name to change ssh keys and password for instead of the"
776 ." image's configured default user.",
777 },
778 cipassword => {
779 optional => 1,
780 type => 'string',
781 description => 'cloud-init: Password to assign the user. Using this is generally not'
782 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
783 .' support hashed passwords.',
784 },
785 ciupgrade => {
786 optional => 1,
787 type => 'boolean',
788 description => 'cloud-init: do an automatic package upgrade after the first boot.'
789 },
790 cicustom => {
791 optional => 1,
792 type => 'string',
793 description => 'cloud-init: Specify custom files to replace the automatically generated'
794 .' ones at start.',
795 format => 'pve-qm-cicustom',
796 },
797 searchdomain => {
798 optional => 1,
799 type => 'string',
800 description => 'cloud-init: Sets DNS search domains for a container. Create will'
801 .' automatically use the setting from the host if neither searchdomain nor nameserver'
802 .' are set.',
803 },
804 nameserver => {
805 optional => 1,
806 type => 'string', format => 'address-list',
807 description => 'cloud-init: Sets DNS server IP address for a container. Create will'
808 .' automatically use the setting from the host if neither searchdomain nor nameserver'
809 .' are set.',
810 },
811 sshkeys => {
812 optional => 1,
813 type => 'string',
814 format => 'urlencoded',
815 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
816 },
817 };
818
819 # what about other qemu settings ?
820 #cpu => 'string',
821 #machine => 'string',
822 #fda => 'file',
823 #fdb => 'file',
824 #mtdblock => 'file',
825 #sd => 'file',
826 #pflash => 'file',
827 #snapshot => 'bool',
828 #bootp => 'file',
829 ##tftp => 'dir',
830 ##smb => 'dir',
831 #kernel => 'file',
832 #append => 'string',
833 #initrd => 'file',
834 ##soundhw => 'string',
835
836 while (my ($k, $v) = each %$confdesc) {
837 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
838 }
839
840 my $MAX_NETS = 32;
841 my $MAX_SERIAL_PORTS = 4;
842 my $MAX_PARALLEL_PORTS = 3;
843 my $MAX_NUMA = 8;
844
845 my $numa_fmt = {
846 cpus => {
847 type => "string",
848 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
849 description => "CPUs accessing this NUMA node.",
850 format_description => "id[-id];...",
851 },
852 memory => {
853 type => "number",
854 description => "Amount of memory this NUMA node provides.",
855 optional => 1,
856 },
857 hostnodes => {
858 type => "string",
859 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
860 description => "Host NUMA nodes to use.",
861 format_description => "id[-id];...",
862 optional => 1,
863 },
864 policy => {
865 type => 'string',
866 enum => [qw(preferred bind interleave)],
867 description => "NUMA allocation policy.",
868 optional => 1,
869 },
870 };
871 PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
872 my $numadesc = {
873 optional => 1,
874 type => 'string', format => $numa_fmt,
875 description => "NUMA topology.",
876 };
877 PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
878
879 for (my $i = 0; $i < $MAX_NUMA; $i++) {
880 $confdesc->{"numa$i"} = $numadesc;
881 }
882
883 my $nic_model_list = [
884 'e1000',
885 'e1000-82540em',
886 'e1000-82544gc',
887 'e1000-82545em',
888 'e1000e',
889 'i82551',
890 'i82557b',
891 'i82559er',
892 'ne2k_isa',
893 'ne2k_pci',
894 'pcnet',
895 'rtl8139',
896 'virtio',
897 'vmxnet3',
898 ];
899 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
900
901 my $net_fmt_bridge_descr = <<__EOD__;
902 Bridge to attach the network device to. The Proxmox VE standard bridge
903 is called 'vmbr0'.
904
905 If you do not specify a bridge, we create a kvm user (NATed) network
906 device, which provides DHCP and DNS services. The following addresses
907 are used:
908
909 10.0.2.2 Gateway
910 10.0.2.3 DNS Server
911 10.0.2.4 SMB Server
912
913 The DHCP server assign addresses to the guest starting from 10.0.2.15.
914 __EOD__
915
916 my $net_fmt = {
917 macaddr => get_standard_option('mac-addr', {
918 description => "MAC address. That address must be unique withing your network. This is"
919 ." automatically generated if not specified.",
920 }),
921 model => {
922 type => 'string',
923 description => "Network Card Model. The 'virtio' model provides the best performance with"
924 ." very low CPU overhead. If your guest does not support this driver, it is usually"
925 ." best to use 'e1000'.",
926 enum => $nic_model_list,
927 default_key => 1,
928 },
929 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
930 bridge => get_standard_option('pve-bridge-id', {
931 description => $net_fmt_bridge_descr,
932 optional => 1,
933 }),
934 queues => {
935 type => 'integer',
936 minimum => 0, maximum => 64,
937 description => 'Number of packet queues to be used on the device.',
938 optional => 1,
939 },
940 rate => {
941 type => 'number',
942 minimum => 0,
943 description => "Rate limit in mbps (megabytes per second) as floating point number.",
944 optional => 1,
945 },
946 tag => {
947 type => 'integer',
948 minimum => 1, maximum => 4094,
949 description => 'VLAN tag to apply to packets on this interface.',
950 optional => 1,
951 },
952 trunks => {
953 type => 'string',
954 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
955 description => 'VLAN trunks to pass through this interface.',
956 format_description => 'vlanid[;vlanid...]',
957 optional => 1,
958 },
959 firewall => {
960 type => 'boolean',
961 description => 'Whether this interface should be protected by the firewall.',
962 optional => 1,
963 },
964 link_down => {
965 type => 'boolean',
966 description => 'Whether this interface should be disconnected (like pulling the plug).',
967 optional => 1,
968 },
969 mtu => {
970 type => 'integer',
971 minimum => 1, maximum => 65520,
972 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
973 optional => 1,
974 },
975 };
976
977 my $netdesc = {
978 optional => 1,
979 type => 'string', format => $net_fmt,
980 description => "Specify network devices.",
981 };
982
983 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
984
985 my $ipconfig_fmt = {
986 ip => {
987 type => 'string',
988 format => 'pve-ipv4-config',
989 format_description => 'IPv4Format/CIDR',
990 description => 'IPv4 address in CIDR format.',
991 optional => 1,
992 default => 'dhcp',
993 },
994 gw => {
995 type => 'string',
996 format => 'ipv4',
997 format_description => 'GatewayIPv4',
998 description => 'Default gateway for IPv4 traffic.',
999 optional => 1,
1000 requires => 'ip',
1001 },
1002 ip6 => {
1003 type => 'string',
1004 format => 'pve-ipv6-config',
1005 format_description => 'IPv6Format/CIDR',
1006 description => 'IPv6 address in CIDR format.',
1007 optional => 1,
1008 default => 'dhcp',
1009 },
1010 gw6 => {
1011 type => 'string',
1012 format => 'ipv6',
1013 format_description => 'GatewayIPv6',
1014 description => 'Default gateway for IPv6 traffic.',
1015 optional => 1,
1016 requires => 'ip6',
1017 },
1018 };
1019 PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
1020 my $ipconfigdesc = {
1021 optional => 1,
1022 type => 'string', format => 'pve-qm-ipconfig',
1023 description => <<'EODESCR',
1024 cloud-init: Specify IP addresses and gateways for the corresponding interface.
1025
1026 IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1027
1028 The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1029 gateway should be provided.
1030 For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1031 cloud-init 19.4 or newer.
1032
1033 If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1034 dhcp on IPv4.
1035 EODESCR
1036 };
1037 PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1038
1039 for (my $i = 0; $i < $MAX_NETS; $i++) {
1040 $confdesc->{"net$i"} = $netdesc;
1041 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1042 }
1043
1044 foreach my $key (keys %$confdesc_cloudinit) {
1045 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1046 }
1047
1048 PVE::JSONSchema::register_format('pve-cpuset', \&pve_verify_cpuset);
1049 sub pve_verify_cpuset {
1050 my ($set_text, $noerr) = @_;
1051
1052 my ($count, $members) = eval { PVE::CpuSet::parse_cpuset($set_text) };
1053
1054 if ($@) {
1055 return if $noerr;
1056 die "unable to parse cpuset option\n";
1057 }
1058
1059 return PVE::CpuSet->new($members)->short_string();
1060 }
1061
1062 PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1063 sub verify_volume_id_or_qm_path {
1064 my ($volid, $noerr) = @_;
1065
1066 return $volid if $volid eq 'none' || $volid eq 'cdrom';
1067
1068 return verify_volume_id_or_absolute_path($volid, $noerr);
1069 }
1070
1071 PVE::JSONSchema::register_format('pve-volume-id-or-absolute-path', \&verify_volume_id_or_absolute_path);
1072 sub verify_volume_id_or_absolute_path {
1073 my ($volid, $noerr) = @_;
1074
1075 return $volid if $volid =~ m|^/|;
1076
1077 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1078 if ($@) {
1079 return if $noerr;
1080 die $@;
1081 }
1082 return $volid;
1083 }
1084
1085 my $serialdesc = {
1086 optional => 1,
1087 type => 'string',
1088 pattern => '(/dev/.+|socket)',
1089 description => "Create a serial device inside the VM (n is 0 to 3)",
1090 verbose_description => <<EODESCR,
1091 Create a serial device inside the VM (n is 0 to 3), and pass through a
1092 host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1093 host side (use 'qm terminal' to open a terminal connection).
1094
1095 NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1096 use with special care.
1097
1098 CAUTION: Experimental! User reported problems with this option.
1099 EODESCR
1100 };
1101
1102 my $paralleldesc= {
1103 optional => 1,
1104 type => 'string',
1105 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1106 description => "Map host parallel devices (n is 0 to 2).",
1107 verbose_description => <<EODESCR,
1108 Map host parallel devices (n is 0 to 2).
1109
1110 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1111 machines - use with special care.
1112
1113 CAUTION: Experimental! User reported problems with this option.
1114 EODESCR
1115 };
1116
1117 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1118 $confdesc->{"parallel$i"} = $paralleldesc;
1119 }
1120
1121 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1122 $confdesc->{"serial$i"} = $serialdesc;
1123 }
1124
1125 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1126 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1127 }
1128
1129 for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1130 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1131 }
1132
1133 for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
1134 $confdesc->{"usb$i"} = $PVE::QemuServer::USB::usbdesc;
1135 }
1136
1137 my $boot_fmt = {
1138 legacy => {
1139 optional => 1,
1140 default_key => 1,
1141 type => 'string',
1142 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1143 . " Deprecated, use 'order=' instead.",
1144 pattern => '[acdn]{1,4}',
1145 format_description => "[acdn]{1,4}",
1146
1147 # note: this is also the fallback if boot: is not given at all
1148 default => 'cdn',
1149 },
1150 order => {
1151 optional => 1,
1152 type => 'string',
1153 format => 'pve-qm-bootdev-list',
1154 format_description => "device[;device...]",
1155 description => <<EODESC,
1156 The guest will attempt to boot from devices in the order they appear here.
1157
1158 Disks, optical drives and passed-through storage USB devices will be directly
1159 booted from, NICs will load PXE, and PCIe devices will either behave like disks
1160 (e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1161
1162 Note that only devices in this list will be marked as bootable and thus loaded
1163 by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1164 (e.g. software-raid), you need to specify all of them here.
1165
1166 Overrides the deprecated 'legacy=[acdn]*' value when given.
1167 EODESC
1168 },
1169 };
1170 PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1171
1172 PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1173 sub verify_bootdev {
1174 my ($dev, $noerr) = @_;
1175
1176 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1177 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1178
1179 my $check = sub {
1180 my ($base) = @_;
1181 return 0 if $dev !~ m/^$base\d+$/;
1182 return 0 if !$confdesc->{$dev};
1183 return 1;
1184 };
1185
1186 return $dev if $check->("net");
1187 return $dev if $check->("usb");
1188 return $dev if $check->("hostpci");
1189
1190 return if $noerr;
1191 die "invalid boot device '$dev'\n";
1192 }
1193
1194 sub print_bootorder {
1195 my ($devs) = @_;
1196 return "" if !@$devs;
1197 my $data = { order => join(';', @$devs) };
1198 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1199 }
1200
1201 my $kvm_api_version = 0;
1202
1203 sub kvm_version {
1204 return $kvm_api_version if $kvm_api_version;
1205
1206 open my $fh, '<', '/dev/kvm' or return;
1207
1208 # 0xae00 => KVM_GET_API_VERSION
1209 $kvm_api_version = ioctl($fh, 0xae00, 0);
1210 close($fh);
1211
1212 return $kvm_api_version;
1213 }
1214
1215 my $kvm_user_version = {};
1216 my $kvm_mtime = {};
1217
1218 sub kvm_user_version {
1219 my ($binary) = @_;
1220
1221 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1222 my $st = stat($binary);
1223
1224 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1225 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1226 $cachedmtime == $st->mtime;
1227
1228 $kvm_user_version->{$binary} = 'unknown';
1229 $kvm_mtime->{$binary} = $st->mtime;
1230
1231 my $code = sub {
1232 my $line = shift;
1233 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1234 $kvm_user_version->{$binary} = $2;
1235 }
1236 };
1237
1238 eval { run_command([$binary, '--version'], outfunc => $code); };
1239 warn $@ if $@;
1240
1241 return $kvm_user_version->{$binary};
1242
1243 }
1244 my sub extract_version {
1245 my ($machine_type, $version) = @_;
1246 $version = kvm_user_version() if !defined($version);
1247 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
1248 }
1249
1250 sub kernel_has_vhost_net {
1251 return -c '/dev/vhost-net';
1252 }
1253
1254 sub option_exists {
1255 my $key = shift;
1256 return defined($confdesc->{$key});
1257 }
1258
1259 my $cdrom_path;
1260 sub get_cdrom_path {
1261
1262 return $cdrom_path if defined($cdrom_path);
1263
1264 $cdrom_path = first { -l $_ } map { "/dev/cdrom$_" } ('', '1', '2');
1265
1266 if (!defined($cdrom_path)) {
1267 log_warn("no physical CD-ROM available, ignoring");
1268 $cdrom_path = '';
1269 }
1270
1271 return $cdrom_path;
1272 }
1273
1274 sub get_iso_path {
1275 my ($storecfg, $vmid, $cdrom) = @_;
1276
1277 if ($cdrom eq 'cdrom') {
1278 return get_cdrom_path();
1279 } elsif ($cdrom eq 'none') {
1280 return '';
1281 } elsif ($cdrom =~ m|^/|) {
1282 return $cdrom;
1283 } else {
1284 return PVE::Storage::path($storecfg, $cdrom);
1285 }
1286 }
1287
1288 # try to convert old style file names to volume IDs
1289 sub filename_to_volume_id {
1290 my ($vmid, $file, $media) = @_;
1291
1292 if (!($file eq 'none' || $file eq 'cdrom' ||
1293 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1294
1295 return if $file =~ m|/|;
1296
1297 if ($media && $media eq 'cdrom') {
1298 $file = "local:iso/$file";
1299 } else {
1300 $file = "local:$vmid/$file";
1301 }
1302 }
1303
1304 return $file;
1305 }
1306
1307 sub verify_media_type {
1308 my ($opt, $vtype, $media) = @_;
1309
1310 return if !$media;
1311
1312 my $etype;
1313 if ($media eq 'disk') {
1314 $etype = 'images';
1315 } elsif ($media eq 'cdrom') {
1316 $etype = 'iso';
1317 } else {
1318 die "internal error";
1319 }
1320
1321 return if ($vtype eq $etype);
1322
1323 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1324 }
1325
1326 sub cleanup_drive_path {
1327 my ($opt, $storecfg, $drive) = @_;
1328
1329 # try to convert filesystem paths to volume IDs
1330
1331 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1332 ($drive->{file} !~ m|^/dev/.+|) &&
1333 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1334 ($drive->{file} !~ m/^\d+$/)) {
1335 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1336 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1337 if !$vtype;
1338 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1339 verify_media_type($opt, $vtype, $drive->{media});
1340 $drive->{file} = $volid;
1341 }
1342
1343 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1344 }
1345
1346 sub parse_hotplug_features {
1347 my ($data) = @_;
1348
1349 my $res = {};
1350
1351 return $res if $data eq '0';
1352
1353 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1354
1355 foreach my $feature (PVE::Tools::split_list($data)) {
1356 if ($feature =~ m/^(network|disk|cpu|memory|usb|cloudinit)$/) {
1357 $res->{$1} = 1;
1358 } else {
1359 die "invalid hotplug feature '$feature'\n";
1360 }
1361 }
1362 return $res;
1363 }
1364
1365 PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1366 sub pve_verify_hotplug_features {
1367 my ($value, $noerr) = @_;
1368
1369 return $value if parse_hotplug_features($value);
1370
1371 return if $noerr;
1372
1373 die "unable to parse hotplug option\n";
1374 }
1375
1376 sub scsi_inquiry {
1377 my($fh, $noerr) = @_;
1378
1379 my $SG_IO = 0x2285;
1380 my $SG_GET_VERSION_NUM = 0x2282;
1381
1382 my $versionbuf = "\x00" x 8;
1383 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1384 if (!$ret) {
1385 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1386 return;
1387 }
1388 my $version = unpack("I", $versionbuf);
1389 if ($version < 30000) {
1390 die "scsi generic interface too old\n" if !$noerr;
1391 return;
1392 }
1393
1394 my $buf = "\x00" x 36;
1395 my $sensebuf = "\x00" x 8;
1396 my $cmd = pack("C x3 C x1", 0x12, 36);
1397
1398 # see /usr/include/scsi/sg.h
1399 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1400
1401 my $packet = pack(
1402 $sg_io_hdr_t, ord('S'), -3, length($cmd), length($sensebuf), 0, length($buf), $buf, $cmd, $sensebuf, 6000
1403 );
1404
1405 $ret = ioctl($fh, $SG_IO, $packet);
1406 if (!$ret) {
1407 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1408 return;
1409 }
1410
1411 my @res = unpack($sg_io_hdr_t, $packet);
1412 if ($res[17] || $res[18]) {
1413 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1414 return;
1415 }
1416
1417 my $res = {};
1418 $res->@{qw(type removable vendor product revision)} = unpack("C C x6 A8 A16 A4", $buf);
1419
1420 $res->{removable} = $res->{removable} & 128 ? 1 : 0;
1421 $res->{type} &= 0x1F;
1422
1423 return $res;
1424 }
1425
1426 sub path_is_scsi {
1427 my ($path) = @_;
1428
1429 my $fh = IO::File->new("+<$path") || return;
1430 my $res = scsi_inquiry($fh, 1);
1431 close($fh);
1432
1433 return $res;
1434 }
1435
1436 sub print_tabletdevice_full {
1437 my ($conf, $arch) = @_;
1438
1439 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1440
1441 # we use uhci for old VMs because tablet driver was buggy in older qemu
1442 my $usbbus;
1443 if ($q35 || $arch eq 'aarch64') {
1444 $usbbus = 'ehci';
1445 } else {
1446 $usbbus = 'uhci';
1447 }
1448
1449 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1450 }
1451
1452 sub print_keyboarddevice_full {
1453 my ($conf, $arch) = @_;
1454
1455 return if $arch ne 'aarch64';
1456
1457 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1458 }
1459
1460 my sub get_drive_id {
1461 my ($drive) = @_;
1462 return "$drive->{interface}$drive->{index}";
1463 }
1464
1465 sub print_drivedevice_full {
1466 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1467
1468 my $device = '';
1469 my $maxdev = 0;
1470
1471 my $drive_id = get_drive_id($drive);
1472 if ($drive->{interface} eq 'virtio') {
1473 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1474 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1475 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1476 } elsif ($drive->{interface} eq 'scsi') {
1477
1478 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1479 my $unit = $drive->{index} % $maxdev;
1480 my $devicetype = 'hd';
1481 my $path = '';
1482 if (drive_is_cdrom($drive)) {
1483 $devicetype = 'cd';
1484 } else {
1485 if ($drive->{file} =~ m|^/|) {
1486 $path = $drive->{file};
1487 if (my $info = path_is_scsi($path)) {
1488 if ($info->{type} == 0 && $drive->{scsiblock}) {
1489 $devicetype = 'block';
1490 } elsif ($info->{type} == 1) { # tape
1491 $devicetype = 'generic';
1492 }
1493 }
1494 } else {
1495 $path = PVE::Storage::path($storecfg, $drive->{file});
1496 }
1497
1498 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
1499 my $version = extract_version($machine_type, kvm_user_version());
1500 if ($path =~ m/^iscsi\:\/\// &&
1501 !min_version($version, 4, 1)) {
1502 $devicetype = 'generic';
1503 }
1504 }
1505
1506 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1507 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
1508 } else {
1509 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1510 .",lun=$drive->{index}";
1511 }
1512 $device .= ",drive=drive-$drive_id,id=$drive_id";
1513
1514 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1515 $device .= ",rotation_rate=1";
1516 }
1517 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1518
1519 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1520 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1521 my $controller = int($drive->{index} / $maxdev);
1522 my $unit = $drive->{index} % $maxdev;
1523 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1524
1525 $device = "ide-$devicetype";
1526 if ($drive->{interface} eq 'ide') {
1527 $device .= ",bus=ide.$controller,unit=$unit";
1528 } else {
1529 $device .= ",bus=ahci$controller.$unit";
1530 }
1531 $device .= ",drive=drive-$drive_id,id=$drive_id";
1532
1533 if ($devicetype eq 'hd') {
1534 if (my $model = $drive->{model}) {
1535 $model = URI::Escape::uri_unescape($model);
1536 $device .= ",model=$model";
1537 }
1538 if ($drive->{ssd}) {
1539 $device .= ",rotation_rate=1";
1540 }
1541 }
1542 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1543 } elsif ($drive->{interface} eq 'usb') {
1544 die "implement me";
1545 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1546 } else {
1547 die "unsupported interface type";
1548 }
1549
1550 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1551
1552 if (my $serial = $drive->{serial}) {
1553 $serial = URI::Escape::uri_unescape($serial);
1554 $device .= ",serial=$serial";
1555 }
1556
1557
1558 return $device;
1559 }
1560
1561 sub get_initiator_name {
1562 my $initiator;
1563
1564 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1565 while (defined(my $line = <$fh>)) {
1566 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1567 $initiator = $1;
1568 last;
1569 }
1570 $fh->close();
1571
1572 return $initiator;
1573 }
1574
1575 my sub storage_allows_io_uring_default {
1576 my ($scfg, $cache_direct) = @_;
1577
1578 # io_uring with cache mode writeback or writethrough on krbd will hang...
1579 return if $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1580
1581 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1582 # sometimes, just plain disable...
1583 return if $scfg && $scfg->{type} eq 'lvm';
1584
1585 # io_uring causes problems when used with CIFS since kernel 5.15
1586 # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
1587 return if $scfg && $scfg->{type} eq 'cifs';
1588
1589 return 1;
1590 }
1591
1592 my sub drive_uses_cache_direct {
1593 my ($drive, $scfg) = @_;
1594
1595 my $cache_direct = 0;
1596
1597 if (my $cache = $drive->{cache}) {
1598 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1599 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1600 $cache_direct = 1;
1601 }
1602
1603 return $cache_direct;
1604 }
1605
1606 sub print_drive_commandline_full {
1607 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1608
1609 my $path;
1610 my $volid = $drive->{file};
1611 my $format = $drive->{format};
1612 my $drive_id = get_drive_id($drive);
1613
1614 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1615 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1616
1617 if (drive_is_cdrom($drive)) {
1618 $path = get_iso_path($storecfg, $vmid, $volid);
1619 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
1620 } else {
1621 if ($storeid) {
1622 $path = PVE::Storage::path($storecfg, $volid);
1623 $format //= qemu_img_format($scfg, $volname);
1624 } else {
1625 $path = $volid;
1626 $format //= "raw";
1627 }
1628 }
1629
1630 my $is_rbd = $path =~ m/^rbd:/;
1631
1632 my $opts = '';
1633 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1634 foreach my $o (@qemu_drive_options) {
1635 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1636 }
1637
1638 # snapshot only accepts on|off
1639 if (defined($drive->{snapshot})) {
1640 my $v = $drive->{snapshot} ? 'on' : 'off';
1641 $opts .= ",snapshot=$v";
1642 }
1643
1644 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1645 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
1646 }
1647
1648 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1649 my ($dir, $qmpname) = @$type;
1650 if (my $v = $drive->{"mbps$dir"}) {
1651 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1652 }
1653 if (my $v = $drive->{"mbps${dir}_max"}) {
1654 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1655 }
1656 if (my $v = $drive->{"bps${dir}_max_length"}) {
1657 $opts .= ",throttling.bps$qmpname-max-length=$v";
1658 }
1659 if (my $v = $drive->{"iops${dir}"}) {
1660 $opts .= ",throttling.iops$qmpname=$v";
1661 }
1662 if (my $v = $drive->{"iops${dir}_max"}) {
1663 $opts .= ",throttling.iops$qmpname-max=$v";
1664 }
1665 if (my $v = $drive->{"iops${dir}_max_length"}) {
1666 $opts .= ",throttling.iops$qmpname-max-length=$v";
1667 }
1668 }
1669
1670 if ($pbs_name) {
1671 $format = "rbd" if $is_rbd;
1672 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1673 if !$format;
1674 $opts .= ",format=alloc-track,file.driver=$format";
1675 } elsif ($format) {
1676 $opts .= ",format=$format";
1677 }
1678
1679 my $cache_direct = drive_uses_cache_direct($drive, $scfg);
1680
1681 $opts .= ",cache=none" if !$drive->{cache} && $cache_direct;
1682
1683 if (!$drive->{aio}) {
1684 if ($io_uring && storage_allows_io_uring_default($scfg, $cache_direct)) {
1685 # io_uring supports all cache modes
1686 $opts .= ",aio=io_uring";
1687 } else {
1688 # aio native works only with O_DIRECT
1689 if($cache_direct) {
1690 $opts .= ",aio=native";
1691 } else {
1692 $opts .= ",aio=threads";
1693 }
1694 }
1695 }
1696
1697 if (!drive_is_cdrom($drive)) {
1698 my $detectzeroes;
1699 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1700 $detectzeroes = 'off';
1701 } elsif ($drive->{discard}) {
1702 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1703 } else {
1704 # This used to be our default with discard not being specified:
1705 $detectzeroes = 'on';
1706 }
1707
1708 # note: 'detect-zeroes' works per blockdev and we want it to persist
1709 # after the alloc-track is removed, so put it on 'file' directly
1710 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1711 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1712 }
1713
1714 if ($pbs_name) {
1715 $opts .= ",backing=$pbs_name";
1716 $opts .= ",auto-remove=on";
1717 }
1718
1719 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1720 my $file_param = "file";
1721 if ($pbs_name) {
1722 # non-rbd drivers require the underlying file to be a seperate block
1723 # node, so add a second .file indirection
1724 $file_param .= ".file" if !$is_rbd;
1725 $file_param .= ".filename";
1726 }
1727 my $pathinfo = $path ? "$file_param=$path," : '';
1728
1729 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1730 }
1731
1732 sub print_pbs_blockdev {
1733 my ($pbs_conf, $pbs_name) = @_;
1734 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1735 $blockdev .= ",repository=$pbs_conf->{repository}";
1736 $blockdev .= ",namespace=$pbs_conf->{namespace}" if $pbs_conf->{namespace};
1737 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1738 $blockdev .= ",archive=$pbs_conf->{archive}";
1739 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1740 return $blockdev;
1741 }
1742
1743 sub print_netdevice_full {
1744 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version) = @_;
1745
1746 my $device = $net->{model};
1747 if ($net->{model} eq 'virtio') {
1748 $device = 'virtio-net-pci';
1749 };
1750
1751 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1752 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1753 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1754 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1755 # and out of each queue plus one config interrupt and control vector queue
1756 my $vectors = $net->{queues} * 2 + 2;
1757 $tmpstr .= ",vectors=$vectors,mq=on";
1758 if (min_version($machine_version, 7, 1)) {
1759 $tmpstr .= ",packed=on";
1760 }
1761 }
1762
1763 if (min_version($machine_version, 7, 1) && $net->{model} eq 'virtio'){
1764 $tmpstr .= ",rx_queue_size=1024,tx_queue_size=1024";
1765 }
1766
1767 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1768
1769 if (my $mtu = $net->{mtu}) {
1770 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1771 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1772 if ($mtu == 1) {
1773 $mtu = $bridge_mtu;
1774 } elsif ($mtu < 576) {
1775 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1776 } elsif ($mtu > $bridge_mtu) {
1777 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1778 }
1779 $tmpstr .= ",host_mtu=$mtu";
1780 } else {
1781 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1782 }
1783 }
1784
1785 if ($use_old_bios_files) {
1786 my $romfile;
1787 if ($device eq 'virtio-net-pci') {
1788 $romfile = 'pxe-virtio.rom';
1789 } elsif ($device eq 'e1000') {
1790 $romfile = 'pxe-e1000.rom';
1791 } elsif ($device eq 'e1000e') {
1792 $romfile = 'pxe-e1000e.rom';
1793 } elsif ($device eq 'ne2k') {
1794 $romfile = 'pxe-ne2k_pci.rom';
1795 } elsif ($device eq 'pcnet') {
1796 $romfile = 'pxe-pcnet.rom';
1797 } elsif ($device eq 'rtl8139') {
1798 $romfile = 'pxe-rtl8139.rom';
1799 }
1800 $tmpstr .= ",romfile=$romfile" if $romfile;
1801 }
1802
1803 return $tmpstr;
1804 }
1805
1806 sub print_netdev_full {
1807 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1808
1809 my $i = '';
1810 if ($netid =~ m/^net(\d+)$/) {
1811 $i = int($1);
1812 }
1813
1814 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1815
1816 my $ifname = "tap${vmid}i$i";
1817
1818 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1819 die "interface name '$ifname' is too long (max 15 character)\n"
1820 if length($ifname) >= 16;
1821
1822 my $vhostparam = '';
1823 if (is_native($arch)) {
1824 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1825 }
1826
1827 my $vmname = $conf->{name} || "vm$vmid";
1828
1829 my $netdev = "";
1830 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1831
1832 if ($net->{bridge}) {
1833 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1834 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1835 } else {
1836 $netdev = "type=user,id=$netid,hostname=$vmname";
1837 }
1838
1839 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1840
1841 return $netdev;
1842 }
1843
1844 my $vga_map = {
1845 'cirrus' => 'cirrus-vga',
1846 'std' => 'VGA',
1847 'vmware' => 'vmware-svga',
1848 'virtio' => 'virtio-vga',
1849 'virtio-gl' => 'virtio-vga-gl',
1850 };
1851
1852 sub print_vga_device {
1853 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1854
1855 my $type = $vga_map->{$vga->{type}};
1856 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1857 $type = 'virtio-gpu';
1858 }
1859 my $vgamem_mb = $vga->{memory};
1860
1861 my $max_outputs = '';
1862 if ($qxlnum) {
1863 $type = $id ? 'qxl' : 'qxl-vga';
1864
1865 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1866 # set max outputs so linux can have up to 4 qxl displays with one device
1867 if (min_version($machine_version, 4, 1)) {
1868 $max_outputs = ",max_outputs=4";
1869 }
1870 }
1871 }
1872
1873 die "no devicetype for $vga->{type}\n" if !$type;
1874
1875 my $memory = "";
1876 if ($vgamem_mb) {
1877 if ($vga->{type} =~ /^virtio/) {
1878 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1879 $memory = ",max_hostmem=$bytes";
1880 } elsif ($qxlnum) {
1881 # from https://www.spice-space.org/multiple-monitors.html
1882 $memory = ",vgamem_mb=$vga->{memory}";
1883 my $ram = $vgamem_mb * 4;
1884 my $vram = $vgamem_mb * 2;
1885 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1886 } else {
1887 $memory = ",vgamem_mb=$vga->{memory}";
1888 }
1889 } elsif ($qxlnum && $id) {
1890 $memory = ",ram_size=67108864,vram_size=33554432";
1891 }
1892
1893 my $edidoff = "";
1894 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1895 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1896 }
1897
1898 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1899 my $vgaid = "vga" . ($id // '');
1900 my $pciaddr;
1901 if ($q35 && $vgaid eq 'vga') {
1902 # the first display uses pcie.0 bus on q35 machines
1903 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1904 } else {
1905 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1906 }
1907
1908 if ($vga->{type} eq 'virtio-gl') {
1909 my $base = '/usr/lib/x86_64-linux-gnu/lib';
1910 die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
1911 if !-e "${base}EGL.so.1" || !-e "${base}GL.so.1";
1912
1913 die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
1914 if !PVE::Tools::dir_glob_regex('/dev/dri/', "renderD.*");
1915 }
1916
1917 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1918 }
1919
1920 sub parse_number_sets {
1921 my ($set) = @_;
1922 my $res = [];
1923 foreach my $part (split(/;/, $set)) {
1924 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1925 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1926 push @$res, [ $1, $2 ];
1927 } else {
1928 die "invalid range: $part\n";
1929 }
1930 }
1931 return $res;
1932 }
1933
1934 sub parse_numa {
1935 my ($data) = @_;
1936
1937 my $res = parse_property_string($numa_fmt, $data);
1938 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1939 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
1940 return $res;
1941 }
1942
1943 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1944 sub parse_net {
1945 my ($data, $disable_mac_autogen) = @_;
1946
1947 my $res = eval { parse_property_string($net_fmt, $data) };
1948 if ($@) {
1949 warn $@;
1950 return;
1951 }
1952 if (!defined($res->{macaddr}) && !$disable_mac_autogen) {
1953 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1954 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1955 }
1956 return $res;
1957 }
1958
1959 # ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1960 sub parse_ipconfig {
1961 my ($data) = @_;
1962
1963 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1964 if ($@) {
1965 warn $@;
1966 return;
1967 }
1968
1969 if ($res->{gw} && !$res->{ip}) {
1970 warn 'gateway specified without specifying an IP address';
1971 return;
1972 }
1973 if ($res->{gw6} && !$res->{ip6}) {
1974 warn 'IPv6 gateway specified without specifying an IPv6 address';
1975 return;
1976 }
1977 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1978 warn 'gateway specified together with DHCP';
1979 return;
1980 }
1981 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1982 # gw6 + auto/dhcp
1983 warn "IPv6 gateway specified together with $res->{ip6} address";
1984 return;
1985 }
1986
1987 if (!$res->{ip} && !$res->{ip6}) {
1988 return { ip => 'dhcp', ip6 => 'dhcp' };
1989 }
1990
1991 return $res;
1992 }
1993
1994 sub print_net {
1995 my $net = shift;
1996
1997 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1998 }
1999
2000 sub add_random_macs {
2001 my ($settings) = @_;
2002
2003 foreach my $opt (keys %$settings) {
2004 next if $opt !~ m/^net(\d+)$/;
2005 my $net = parse_net($settings->{$opt});
2006 next if !$net;
2007 $settings->{$opt} = print_net($net);
2008 }
2009 }
2010
2011 sub vm_is_volid_owner {
2012 my ($storecfg, $vmid, $volid) = @_;
2013
2014 if ($volid !~ m|^/|) {
2015 my ($path, $owner);
2016 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
2017 if ($owner && ($owner == $vmid)) {
2018 return 1;
2019 }
2020 }
2021
2022 return;
2023 }
2024
2025 sub vmconfig_register_unused_drive {
2026 my ($storecfg, $vmid, $conf, $drive) = @_;
2027
2028 if (drive_is_cloudinit($drive)) {
2029 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
2030 warn $@ if $@;
2031 delete $conf->{cloudinit};
2032 } elsif (!drive_is_cdrom($drive)) {
2033 my $volid = $drive->{file};
2034 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
2035 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
2036 }
2037 }
2038 }
2039
2040 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
2041 my $smbios1_fmt = {
2042 uuid => {
2043 type => 'string',
2044 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
2045 format_description => 'UUID',
2046 description => "Set SMBIOS1 UUID.",
2047 optional => 1,
2048 },
2049 version => {
2050 type => 'string',
2051 pattern => '[A-Za-z0-9+\/]+={0,2}',
2052 format_description => 'Base64 encoded string',
2053 description => "Set SMBIOS1 version.",
2054 optional => 1,
2055 },
2056 serial => {
2057 type => 'string',
2058 pattern => '[A-Za-z0-9+\/]+={0,2}',
2059 format_description => 'Base64 encoded string',
2060 description => "Set SMBIOS1 serial number.",
2061 optional => 1,
2062 },
2063 manufacturer => {
2064 type => 'string',
2065 pattern => '[A-Za-z0-9+\/]+={0,2}',
2066 format_description => 'Base64 encoded string',
2067 description => "Set SMBIOS1 manufacturer.",
2068 optional => 1,
2069 },
2070 product => {
2071 type => 'string',
2072 pattern => '[A-Za-z0-9+\/]+={0,2}',
2073 format_description => 'Base64 encoded string',
2074 description => "Set SMBIOS1 product ID.",
2075 optional => 1,
2076 },
2077 sku => {
2078 type => 'string',
2079 pattern => '[A-Za-z0-9+\/]+={0,2}',
2080 format_description => 'Base64 encoded string',
2081 description => "Set SMBIOS1 SKU string.",
2082 optional => 1,
2083 },
2084 family => {
2085 type => 'string',
2086 pattern => '[A-Za-z0-9+\/]+={0,2}',
2087 format_description => 'Base64 encoded string',
2088 description => "Set SMBIOS1 family string.",
2089 optional => 1,
2090 },
2091 base64 => {
2092 type => 'boolean',
2093 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2094 optional => 1,
2095 },
2096 };
2097
2098 sub parse_smbios1 {
2099 my ($data) = @_;
2100
2101 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2102 warn $@ if $@;
2103 return $res;
2104 }
2105
2106 sub print_smbios1 {
2107 my ($smbios1) = @_;
2108 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2109 }
2110
2111 PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2112
2113 sub parse_watchdog {
2114 my ($value) = @_;
2115
2116 return if !$value;
2117
2118 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2119 warn $@ if $@;
2120 return $res;
2121 }
2122
2123 sub parse_guest_agent {
2124 my ($conf) = @_;
2125
2126 return {} if !defined($conf->{agent});
2127
2128 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2129 warn $@ if $@;
2130
2131 # if the agent is disabled ignore the other potentially set properties
2132 return {} if !$res->{enabled};
2133 return $res;
2134 }
2135
2136 sub get_qga_key {
2137 my ($conf, $key) = @_;
2138 return undef if !defined($conf->{agent});
2139
2140 my $agent = parse_guest_agent($conf);
2141 return $agent->{$key};
2142 }
2143
2144 sub parse_vga {
2145 my ($value) = @_;
2146
2147 return {} if !$value;
2148 my $res = eval { parse_property_string($vga_fmt, $value) };
2149 warn $@ if $@;
2150 return $res;
2151 }
2152
2153 sub parse_rng {
2154 my ($value) = @_;
2155
2156 return if !$value;
2157
2158 my $res = eval { parse_property_string($rng_fmt, $value) };
2159 warn $@ if $@;
2160 return $res;
2161 }
2162
2163 sub parse_meta_info {
2164 my ($value) = @_;
2165
2166 return if !$value;
2167
2168 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2169 warn $@ if $@;
2170 return $res;
2171 }
2172
2173 sub new_meta_info_string {
2174 my () = @_; # for now do not allow to override any value
2175
2176 return PVE::JSONSchema::print_property_string(
2177 {
2178 'creation-qemu' => kvm_user_version(),
2179 ctime => "". int(time()),
2180 },
2181 $meta_info_fmt
2182 );
2183 }
2184
2185 sub qemu_created_version_fixups {
2186 my ($conf, $forcemachine, $kvmver) = @_;
2187
2188 my $meta = parse_meta_info($conf->{meta}) // {};
2189 my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
2190
2191 # check if we need to apply some handling for VMs that always use the latest machine version but
2192 # had a machine version transition happen that affected HW such that, e.g., an OS config change
2193 # would be required (we do not want to pin machine version for non-windows OS type)
2194 if (
2195 (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
2196 && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
2197 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
2198 && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
2199 ) {
2200 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
2201 if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
2202 # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
2203 # and thus with the predictable interface naming of systemd
2204 return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
2205 }
2206 }
2207 return;
2208 }
2209
2210 # add JSON properties for create and set function
2211 sub json_config_properties {
2212 my ($prop, $with_disk_alloc) = @_;
2213
2214 my $skip_json_config_opts = {
2215 parent => 1,
2216 snaptime => 1,
2217 vmstate => 1,
2218 runningmachine => 1,
2219 runningcpu => 1,
2220 meta => 1,
2221 };
2222
2223 foreach my $opt (keys %$confdesc) {
2224 next if $skip_json_config_opts->{$opt};
2225
2226 if ($with_disk_alloc && is_valid_drivename($opt)) {
2227 $prop->{$opt} = $PVE::QemuServer::Drive::drivedesc_hash_with_alloc->{$opt};
2228 } else {
2229 $prop->{$opt} = $confdesc->{$opt};
2230 }
2231 }
2232
2233 return $prop;
2234 }
2235
2236 # Properties that we can read from an OVF file
2237 sub json_ovf_properties {
2238 my $prop = {};
2239
2240 for my $device (PVE::QemuServer::Drive::valid_drive_names()) {
2241 $prop->{$device} = {
2242 type => 'string',
2243 format => 'pve-volume-id-or-absolute-path',
2244 description => "Disk image that gets imported to $device",
2245 optional => 1,
2246 };
2247 }
2248
2249 $prop->{cores} = {
2250 type => 'integer',
2251 description => "The number of CPU cores.",
2252 optional => 1,
2253 };
2254 $prop->{memory} = {
2255 type => 'integer',
2256 description => "Amount of RAM for the VM in MB.",
2257 optional => 1,
2258 };
2259 $prop->{name} = {
2260 type => 'string',
2261 description => "Name of the VM.",
2262 optional => 1,
2263 };
2264
2265 return $prop;
2266 }
2267
2268 # return copy of $confdesc_cloudinit to generate documentation
2269 sub cloudinit_config_properties {
2270
2271 return dclone($confdesc_cloudinit);
2272 }
2273
2274 sub cloudinit_pending_properties {
2275 my $p = {
2276 map { $_ => 1 } keys $confdesc_cloudinit->%*,
2277 name => 1,
2278 };
2279 $p->{"net$_"} = 1 for 0..($MAX_NETS-1);
2280 return $p;
2281 }
2282
2283 sub check_type {
2284 my ($key, $value) = @_;
2285
2286 die "unknown setting '$key'\n" if !$confdesc->{$key};
2287
2288 my $type = $confdesc->{$key}->{type};
2289
2290 if (!defined($value)) {
2291 die "got undefined value\n";
2292 }
2293
2294 if ($value =~ m/[\n\r]/) {
2295 die "property contains a line feed\n";
2296 }
2297
2298 if ($type eq 'boolean') {
2299 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2300 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2301 die "type check ('boolean') failed - got '$value'\n";
2302 } elsif ($type eq 'integer') {
2303 return int($1) if $value =~ m/^(\d+)$/;
2304 die "type check ('integer') failed - got '$value'\n";
2305 } elsif ($type eq 'number') {
2306 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2307 die "type check ('number') failed - got '$value'\n";
2308 } elsif ($type eq 'string') {
2309 if (my $fmt = $confdesc->{$key}->{format}) {
2310 PVE::JSONSchema::check_format($fmt, $value);
2311 return $value;
2312 }
2313 $value =~ s/^\"(.*)\"$/$1/;
2314 return $value;
2315 } else {
2316 die "internal error"
2317 }
2318 }
2319
2320 sub destroy_vm {
2321 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2322
2323 my $conf = PVE::QemuConfig->load_config($vmid);
2324
2325 if (!$skiplock && !PVE::QemuConfig->has_lock($conf, 'suspended')) {
2326 PVE::QemuConfig->check_lock($conf);
2327 }
2328
2329 if ($conf->{template}) {
2330 # check if any base image is still used by a linked clone
2331 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2332 my ($ds, $drive) = @_;
2333 return if drive_is_cdrom($drive);
2334
2335 my $volid = $drive->{file};
2336 return if !$volid || $volid =~ m|^/|;
2337
2338 die "base volume '$volid' is still in use by linked cloned\n"
2339 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2340
2341 });
2342 }
2343
2344 my $volids = {};
2345 my $remove_owned_drive = sub {
2346 my ($ds, $drive) = @_;
2347 return if drive_is_cdrom($drive, 1);
2348
2349 my $volid = $drive->{file};
2350 return if !$volid || $volid =~ m|^/|;
2351 return if $volids->{$volid};
2352
2353 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2354 return if !$path || !$owner || ($owner != $vmid);
2355
2356 $volids->{$volid} = 1;
2357 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2358 warn "Could not remove disk '$volid', check manually: $@" if $@;
2359 };
2360
2361 # only remove disks owned by this VM (referenced in the config)
2362 my $include_opts = {
2363 include_unused => 1,
2364 extra_keys => ['vmstate'],
2365 };
2366 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2367
2368 for my $snap (values %{$conf->{snapshots}}) {
2369 next if !defined($snap->{vmstate});
2370 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2371 next if !defined($drive);
2372 $remove_owned_drive->('vmstate', $drive);
2373 }
2374
2375 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2376
2377 if ($purge_unreferenced) { # also remove unreferenced disk
2378 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2379 PVE::Storage::foreach_volid($vmdisks, sub {
2380 my ($volid, $sid, $volname, $d) = @_;
2381 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2382 warn $@ if $@;
2383 });
2384 }
2385
2386 if (defined $replacement_conf) {
2387 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2388 } else {
2389 PVE::QemuConfig->destroy_config($vmid);
2390 }
2391 }
2392
2393 sub parse_vm_config {
2394 my ($filename, $raw, $strict) = @_;
2395
2396 return if !defined($raw);
2397
2398 my $res = {
2399 digest => Digest::SHA::sha1_hex($raw),
2400 snapshots => {},
2401 pending => {},
2402 cloudinit => {},
2403 };
2404
2405 my $handle_error = sub {
2406 my ($msg) = @_;
2407
2408 if ($strict) {
2409 die $msg;
2410 } else {
2411 warn $msg;
2412 }
2413 };
2414
2415 $filename =~ m|/qemu-server/(\d+)\.conf$|
2416 || die "got strange filename '$filename'";
2417
2418 my $vmid = $1;
2419
2420 my $conf = $res;
2421 my $descr;
2422 my $finish_description = sub {
2423 if (defined($descr)) {
2424 $descr =~ s/\s+$//;
2425 $conf->{description} = $descr;
2426 }
2427 $descr = undef;
2428 };
2429 my $section = '';
2430
2431 my @lines = split(/\n/, $raw);
2432 foreach my $line (@lines) {
2433 next if $line =~ m/^\s*$/;
2434
2435 if ($line =~ m/^\[PENDING\]\s*$/i) {
2436 $section = 'pending';
2437 $finish_description->();
2438 $conf = $res->{$section} = {};
2439 next;
2440 } elsif ($line =~ m/^\[special:cloudinit\]\s*$/i) {
2441 $section = 'cloudinit';
2442 $finish_description->();
2443 $conf = $res->{$section} = {};
2444 next;
2445
2446 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2447 $section = $1;
2448 $finish_description->();
2449 $conf = $res->{snapshots}->{$section} = {};
2450 next;
2451 }
2452
2453 if ($line =~ m/^\#(.*)$/) {
2454 $descr = '' if !defined($descr);
2455 $descr .= PVE::Tools::decode_text($1) . "\n";
2456 next;
2457 }
2458
2459 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2460 $descr = '' if !defined($descr);
2461 $descr .= PVE::Tools::decode_text($2);
2462 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2463 $conf->{snapstate} = $1;
2464 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2465 my $key = $1;
2466 my $value = $2;
2467 $conf->{$key} = $value;
2468 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2469 my $value = $1;
2470 if ($section eq 'pending') {
2471 $conf->{delete} = $value; # we parse this later
2472 } else {
2473 $handle_error->("vm $vmid - property 'delete' is only allowed in [PENDING]\n");
2474 }
2475 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2476 my $key = $1;
2477 my $value = $2;
2478 if ($section eq 'cloudinit') {
2479 # ignore validation only used for informative purpose
2480 $conf->{$key} = $value;
2481 next;
2482 }
2483 eval { $value = check_type($key, $value); };
2484 if ($@) {
2485 $handle_error->("vm $vmid - unable to parse value of '$key' - $@");
2486 } else {
2487 $key = 'ide2' if $key eq 'cdrom';
2488 my $fmt = $confdesc->{$key}->{format};
2489 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2490 my $v = parse_drive($key, $value);
2491 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2492 $v->{file} = $volid;
2493 $value = print_drive($v);
2494 } else {
2495 $handle_error->("vm $vmid - unable to parse value of '$key'\n");
2496 next;
2497 }
2498 }
2499
2500 $conf->{$key} = $value;
2501 }
2502 } else {
2503 $handle_error->("vm $vmid - unable to parse config: $line\n");
2504 }
2505 }
2506
2507 $finish_description->();
2508 delete $res->{snapstate}; # just to be sure
2509
2510 return $res;
2511 }
2512
2513 sub write_vm_config {
2514 my ($filename, $conf) = @_;
2515
2516 delete $conf->{snapstate}; # just to be sure
2517
2518 if ($conf->{cdrom}) {
2519 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2520 $conf->{ide2} = $conf->{cdrom};
2521 delete $conf->{cdrom};
2522 }
2523
2524 # we do not use 'smp' any longer
2525 if ($conf->{sockets}) {
2526 delete $conf->{smp};
2527 } elsif ($conf->{smp}) {
2528 $conf->{sockets} = $conf->{smp};
2529 delete $conf->{cores};
2530 delete $conf->{smp};
2531 }
2532
2533 my $used_volids = {};
2534
2535 my $cleanup_config = sub {
2536 my ($cref, $pending, $snapname) = @_;
2537
2538 foreach my $key (keys %$cref) {
2539 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2540 $key eq 'snapstate' || $key eq 'pending' || $key eq 'cloudinit';
2541 my $value = $cref->{$key};
2542 if ($key eq 'delete') {
2543 die "propertry 'delete' is only allowed in [PENDING]\n"
2544 if !$pending;
2545 # fixme: check syntax?
2546 next;
2547 }
2548 eval { $value = check_type($key, $value); };
2549 die "unable to parse value of '$key' - $@" if $@;
2550
2551 $cref->{$key} = $value;
2552
2553 if (!$snapname && is_valid_drivename($key)) {
2554 my $drive = parse_drive($key, $value);
2555 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2556 }
2557 }
2558 };
2559
2560 &$cleanup_config($conf);
2561
2562 &$cleanup_config($conf->{pending}, 1);
2563
2564 foreach my $snapname (keys %{$conf->{snapshots}}) {
2565 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2566 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2567 }
2568
2569 # remove 'unusedX' settings if we re-add a volume
2570 foreach my $key (keys %$conf) {
2571 my $value = $conf->{$key};
2572 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2573 delete $conf->{$key};
2574 }
2575 }
2576
2577 my $generate_raw_config = sub {
2578 my ($conf, $pending) = @_;
2579
2580 my $raw = '';
2581
2582 # add description as comment to top of file
2583 if (defined(my $descr = $conf->{description})) {
2584 if ($descr) {
2585 foreach my $cl (split(/\n/, $descr)) {
2586 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2587 }
2588 } else {
2589 $raw .= "#\n" if $pending;
2590 }
2591 }
2592
2593 foreach my $key (sort keys %$conf) {
2594 next if $key =~ /^(digest|description|pending|cloudinit|snapshots)$/;
2595 $raw .= "$key: $conf->{$key}\n";
2596 }
2597 return $raw;
2598 };
2599
2600 my $raw = &$generate_raw_config($conf);
2601
2602 if (scalar(keys %{$conf->{pending}})){
2603 $raw .= "\n[PENDING]\n";
2604 $raw .= &$generate_raw_config($conf->{pending}, 1);
2605 }
2606
2607 if (scalar(keys %{$conf->{cloudinit}}) && PVE::QemuConfig->has_cloudinit($conf)){
2608 $raw .= "\n[special:cloudinit]\n";
2609 $raw .= &$generate_raw_config($conf->{cloudinit});
2610 }
2611
2612 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2613 $raw .= "\n[$snapname]\n";
2614 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2615 }
2616
2617 return $raw;
2618 }
2619
2620 sub load_defaults {
2621
2622 my $res = {};
2623
2624 # we use static defaults from our JSON schema configuration
2625 foreach my $key (keys %$confdesc) {
2626 if (defined(my $default = $confdesc->{$key}->{default})) {
2627 $res->{$key} = $default;
2628 }
2629 }
2630
2631 return $res;
2632 }
2633
2634 sub config_list {
2635 my $vmlist = PVE::Cluster::get_vmlist();
2636 my $res = {};
2637 return $res if !$vmlist || !$vmlist->{ids};
2638 my $ids = $vmlist->{ids};
2639 my $nodename = nodename();
2640
2641 foreach my $vmid (keys %$ids) {
2642 my $d = $ids->{$vmid};
2643 next if !$d->{node} || $d->{node} ne $nodename;
2644 next if !$d->{type} || $d->{type} ne 'qemu';
2645 $res->{$vmid}->{exists} = 1;
2646 }
2647 return $res;
2648 }
2649
2650 # test if VM uses local resources (to prevent migration)
2651 sub check_local_resources {
2652 my ($conf, $noerr) = @_;
2653
2654 my @loc_res = ();
2655 my $mapped_res = [];
2656
2657 my $nodelist = PVE::Cluster::get_nodelist();
2658 my $pci_map = PVE::Mapping::PCI::config();
2659 my $usb_map = PVE::Mapping::USB::config();
2660
2661 my $missing_mappings_by_node = { map { $_ => [] } @$nodelist };
2662
2663 my $add_missing_mapping = sub {
2664 my ($type, $key, $id) = @_;
2665 for my $node (@$nodelist) {
2666 my $entry;
2667 if ($type eq 'pci') {
2668 $entry = PVE::Mapping::PCI::get_node_mapping($pci_map, $id, $node);
2669 } elsif ($type eq 'usb') {
2670 $entry = PVE::Mapping::USB::get_node_mapping($usb_map, $id, $node);
2671 }
2672 if (!scalar($entry->@*)) {
2673 push @{$missing_mappings_by_node->{$node}}, $key;
2674 }
2675 }
2676 };
2677
2678 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2679 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2680
2681 push @loc_res, "ivshmem" if $conf->{ivshmem};
2682
2683 foreach my $k (keys %$conf) {
2684 if ($k =~ m/^usb/) {
2685 my $entry = parse_property_string('pve-qm-usb', $conf->{$k});
2686 next if $entry->{host} =~ m/^spice$/i;
2687 if ($entry->{mapping}) {
2688 $add_missing_mapping->('usb', $k, $entry->{mapping});
2689 push @$mapped_res, $k;
2690 }
2691 }
2692 if ($k =~ m/^hostpci/) {
2693 my $entry = parse_property_string('pve-qm-hostpci', $conf->{$k});
2694 if ($entry->{mapping}) {
2695 $add_missing_mapping->('pci', $k, $entry->{mapping});
2696 push @$mapped_res, $k;
2697 }
2698 }
2699 # sockets are safe: they will recreated be on the target side post-migrate
2700 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2701 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2702 }
2703
2704 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2705
2706 return wantarray ? (\@loc_res, $mapped_res, $missing_mappings_by_node) : \@loc_res;
2707 }
2708
2709 # check if used storages are available on all nodes (use by migrate)
2710 sub check_storage_availability {
2711 my ($storecfg, $conf, $node) = @_;
2712
2713 PVE::QemuConfig->foreach_volume($conf, sub {
2714 my ($ds, $drive) = @_;
2715
2716 my $volid = $drive->{file};
2717 return if !$volid;
2718
2719 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2720 return if !$sid;
2721
2722 # check if storage is available on both nodes
2723 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2724 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2725
2726 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2727
2728 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2729 if !$scfg->{content}->{$vtype};
2730 });
2731 }
2732
2733 # list nodes where all VM images are available (used by has_feature API)
2734 sub shared_nodes {
2735 my ($conf, $storecfg) = @_;
2736
2737 my $nodelist = PVE::Cluster::get_nodelist();
2738 my $nodehash = { map { $_ => 1 } @$nodelist };
2739 my $nodename = nodename();
2740
2741 PVE::QemuConfig->foreach_volume($conf, sub {
2742 my ($ds, $drive) = @_;
2743
2744 my $volid = $drive->{file};
2745 return if !$volid;
2746
2747 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2748 if ($storeid) {
2749 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2750 if ($scfg->{disable}) {
2751 $nodehash = {};
2752 } elsif (my $avail = $scfg->{nodes}) {
2753 foreach my $node (keys %$nodehash) {
2754 delete $nodehash->{$node} if !$avail->{$node};
2755 }
2756 } elsif (!$scfg->{shared}) {
2757 foreach my $node (keys %$nodehash) {
2758 delete $nodehash->{$node} if $node ne $nodename
2759 }
2760 }
2761 }
2762 });
2763
2764 return $nodehash
2765 }
2766
2767 sub check_local_storage_availability {
2768 my ($conf, $storecfg) = @_;
2769
2770 my $nodelist = PVE::Cluster::get_nodelist();
2771 my $nodehash = { map { $_ => {} } @$nodelist };
2772
2773 PVE::QemuConfig->foreach_volume($conf, sub {
2774 my ($ds, $drive) = @_;
2775
2776 my $volid = $drive->{file};
2777 return if !$volid;
2778
2779 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2780 if ($storeid) {
2781 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2782
2783 if ($scfg->{disable}) {
2784 foreach my $node (keys %$nodehash) {
2785 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2786 }
2787 } elsif (my $avail = $scfg->{nodes}) {
2788 foreach my $node (keys %$nodehash) {
2789 if (!$avail->{$node}) {
2790 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2791 }
2792 }
2793 }
2794 }
2795 });
2796
2797 foreach my $node (values %$nodehash) {
2798 if (my $unavail = $node->{unavailable_storages}) {
2799 $node->{unavailable_storages} = [ sort keys %$unavail ];
2800 }
2801 }
2802
2803 return $nodehash
2804 }
2805
2806 # Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2807 sub check_running {
2808 my ($vmid, $nocheck, $node) = @_;
2809
2810 # $nocheck is set when called during a migration, in which case the config
2811 # file might still or already reside on the *other* node
2812 # - because rename has already happened, and current node is source
2813 # - because rename hasn't happened yet, and current node is target
2814 # - because rename has happened, current node is target, but hasn't yet
2815 # processed it yet
2816 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2817 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2818 }
2819
2820 sub vzlist {
2821
2822 my $vzlist = config_list();
2823
2824 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2825
2826 while (defined(my $de = $fd->read)) {
2827 next if $de !~ m/^(\d+)\.pid$/;
2828 my $vmid = $1;
2829 next if !defined($vzlist->{$vmid});
2830 if (my $pid = check_running($vmid)) {
2831 $vzlist->{$vmid}->{pid} = $pid;
2832 }
2833 }
2834
2835 return $vzlist;
2836 }
2837
2838 our $vmstatus_return_properties = {
2839 vmid => get_standard_option('pve-vmid'),
2840 status => {
2841 description => "QEMU process status.",
2842 type => 'string',
2843 enum => ['stopped', 'running'],
2844 },
2845 maxmem => {
2846 description => "Maximum memory in bytes.",
2847 type => 'integer',
2848 optional => 1,
2849 renderer => 'bytes',
2850 },
2851 maxdisk => {
2852 description => "Root disk size in bytes.",
2853 type => 'integer',
2854 optional => 1,
2855 renderer => 'bytes',
2856 },
2857 name => {
2858 description => "VM name.",
2859 type => 'string',
2860 optional => 1,
2861 },
2862 qmpstatus => {
2863 description => "VM run state from the 'query-status' QMP monitor command.",
2864 type => 'string',
2865 optional => 1,
2866 },
2867 pid => {
2868 description => "PID of running qemu process.",
2869 type => 'integer',
2870 optional => 1,
2871 },
2872 uptime => {
2873 description => "Uptime.",
2874 type => 'integer',
2875 optional => 1,
2876 renderer => 'duration',
2877 },
2878 cpus => {
2879 description => "Maximum usable CPUs.",
2880 type => 'number',
2881 optional => 1,
2882 },
2883 lock => {
2884 description => "The current config lock, if any.",
2885 type => 'string',
2886 optional => 1,
2887 },
2888 tags => {
2889 description => "The current configured tags, if any",
2890 type => 'string',
2891 optional => 1,
2892 },
2893 'running-machine' => {
2894 description => "The currently running machine type (if running).",
2895 type => 'string',
2896 optional => 1,
2897 },
2898 'running-qemu' => {
2899 description => "The currently running QEMU version (if running).",
2900 type => 'string',
2901 optional => 1,
2902 },
2903 };
2904
2905 my $last_proc_pid_stat;
2906
2907 # get VM status information
2908 # This must be fast and should not block ($full == false)
2909 # We only query KVM using QMP if $full == true (this can be slow)
2910 sub vmstatus {
2911 my ($opt_vmid, $full) = @_;
2912
2913 my $res = {};
2914
2915 my $storecfg = PVE::Storage::config();
2916
2917 my $list = vzlist();
2918 my $defaults = load_defaults();
2919
2920 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2921
2922 my $cpucount = $cpuinfo->{cpus} || 1;
2923
2924 foreach my $vmid (keys %$list) {
2925 next if $opt_vmid && ($vmid ne $opt_vmid);
2926
2927 my $conf = PVE::QemuConfig->load_config($vmid);
2928
2929 my $d = { vmid => int($vmid) };
2930 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2931
2932 # fixme: better status?
2933 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2934
2935 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2936 if (defined($size)) {
2937 $d->{disk} = 0; # no info available
2938 $d->{maxdisk} = $size;
2939 } else {
2940 $d->{disk} = 0;
2941 $d->{maxdisk} = 0;
2942 }
2943
2944 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2945 * ($conf->{cores} || $defaults->{cores});
2946 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2947 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2948
2949 $d->{name} = $conf->{name} || "VM $vmid";
2950 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2951 : $defaults->{memory}*(1024*1024);
2952
2953 if ($conf->{balloon}) {
2954 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2955 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2956 : $defaults->{shares};
2957 }
2958
2959 $d->{uptime} = 0;
2960 $d->{cpu} = 0;
2961 $d->{mem} = 0;
2962
2963 $d->{netout} = 0;
2964 $d->{netin} = 0;
2965
2966 $d->{diskread} = 0;
2967 $d->{diskwrite} = 0;
2968
2969 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2970
2971 $d->{serial} = 1 if conf_has_serial($conf);
2972 $d->{lock} = $conf->{lock} if $conf->{lock};
2973 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2974
2975 $res->{$vmid} = $d;
2976 }
2977
2978 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2979 foreach my $dev (keys %$netdev) {
2980 next if $dev !~ m/^tap([1-9]\d*)i/;
2981 my $vmid = $1;
2982 my $d = $res->{$vmid};
2983 next if !$d;
2984
2985 $d->{netout} += $netdev->{$dev}->{receive};
2986 $d->{netin} += $netdev->{$dev}->{transmit};
2987
2988 if ($full) {
2989 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2990 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2991 }
2992
2993 }
2994
2995 my $ctime = gettimeofday;
2996
2997 foreach my $vmid (keys %$list) {
2998
2999 my $d = $res->{$vmid};
3000 my $pid = $d->{pid};
3001 next if !$pid;
3002
3003 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
3004 next if !$pstat; # not running
3005
3006 my $used = $pstat->{utime} + $pstat->{stime};
3007
3008 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
3009
3010 if ($pstat->{vsize}) {
3011 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
3012 }
3013
3014 my $old = $last_proc_pid_stat->{$pid};
3015 if (!$old) {
3016 $last_proc_pid_stat->{$pid} = {
3017 time => $ctime,
3018 used => $used,
3019 cpu => 0,
3020 };
3021 next;
3022 }
3023
3024 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
3025
3026 if ($dtime > 1000) {
3027 my $dutime = $used - $old->{used};
3028
3029 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
3030 $last_proc_pid_stat->{$pid} = {
3031 time => $ctime,
3032 used => $used,
3033 cpu => $d->{cpu},
3034 };
3035 } else {
3036 $d->{cpu} = $old->{cpu};
3037 }
3038 }
3039
3040 return $res if !$full;
3041
3042 my $qmpclient = PVE::QMPClient->new();
3043
3044 my $ballooncb = sub {
3045 my ($vmid, $resp) = @_;
3046
3047 my $info = $resp->{'return'};
3048 return if !$info->{max_mem};
3049
3050 my $d = $res->{$vmid};
3051
3052 # use memory assigned to VM
3053 $d->{maxmem} = $info->{max_mem};
3054 $d->{balloon} = $info->{actual};
3055
3056 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
3057 $d->{mem} = $info->{total_mem} - $info->{free_mem};
3058 $d->{freemem} = $info->{free_mem};
3059 }
3060
3061 $d->{ballooninfo} = $info;
3062 };
3063
3064 my $blockstatscb = sub {
3065 my ($vmid, $resp) = @_;
3066 my $data = $resp->{'return'} || [];
3067 my $totalrdbytes = 0;
3068 my $totalwrbytes = 0;
3069
3070 for my $blockstat (@$data) {
3071 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
3072 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
3073
3074 $blockstat->{device} =~ s/drive-//;
3075 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
3076 }
3077 $res->{$vmid}->{diskread} = $totalrdbytes;
3078 $res->{$vmid}->{diskwrite} = $totalwrbytes;
3079 };
3080
3081 my $machinecb = sub {
3082 my ($vmid, $resp) = @_;
3083 my $data = $resp->{'return'} || [];
3084
3085 $res->{$vmid}->{'running-machine'} =
3086 PVE::QemuServer::Machine::current_from_query_machines($data);
3087 };
3088
3089 my $versioncb = sub {
3090 my ($vmid, $resp) = @_;
3091 my $data = $resp->{'return'} // {};
3092 my $version = 'unknown';
3093
3094 if (my $v = $data->{qemu}) {
3095 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
3096 }
3097
3098 $res->{$vmid}->{'running-qemu'} = $version;
3099 };
3100
3101 my $statuscb = sub {
3102 my ($vmid, $resp) = @_;
3103
3104 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
3105 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
3106 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
3107 # this fails if ballon driver is not loaded, so this must be
3108 # the last commnand (following command are aborted if this fails).
3109 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
3110
3111 my $status = 'unknown';
3112 if (!defined($status = $resp->{'return'}->{status})) {
3113 warn "unable to get VM status\n";
3114 return;
3115 }
3116
3117 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
3118 };
3119
3120 foreach my $vmid (keys %$list) {
3121 next if $opt_vmid && ($vmid ne $opt_vmid);
3122 next if !$res->{$vmid}->{pid}; # not running
3123 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
3124 }
3125
3126 $qmpclient->queue_execute(undef, 2);
3127
3128 foreach my $vmid (keys %$list) {
3129 next if $opt_vmid && ($vmid ne $opt_vmid);
3130 next if !$res->{$vmid}->{pid}; #not running
3131
3132 # we can't use the $qmpclient since it might have already aborted on
3133 # 'query-balloon', but this might also fail for older versions...
3134 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
3135 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
3136 }
3137
3138 foreach my $vmid (keys %$list) {
3139 next if $opt_vmid && ($vmid ne $opt_vmid);
3140 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
3141 }
3142
3143 return $res;
3144 }
3145
3146 sub conf_has_serial {
3147 my ($conf) = @_;
3148
3149 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3150 if ($conf->{"serial$i"}) {
3151 return 1;
3152 }
3153 }
3154
3155 return 0;
3156 }
3157
3158 sub conf_has_audio {
3159 my ($conf, $id) = @_;
3160
3161 $id //= 0;
3162 my $audio = $conf->{"audio$id"};
3163 return if !defined($audio);
3164
3165 my $audioproperties = parse_property_string($audio_fmt, $audio);
3166 my $audiodriver = $audioproperties->{driver} // 'spice';
3167
3168 return {
3169 dev => $audioproperties->{device},
3170 dev_id => "audiodev$id",
3171 backend => $audiodriver,
3172 backend_id => "$audiodriver-backend${id}",
3173 };
3174 }
3175
3176 sub audio_devs {
3177 my ($audio, $audiopciaddr, $machine_version) = @_;
3178
3179 my $devs = [];
3180
3181 my $id = $audio->{dev_id};
3182 my $audiodev = "";
3183 if (min_version($machine_version, 4, 2)) {
3184 $audiodev = ",audiodev=$audio->{backend_id}";
3185 }
3186
3187 if ($audio->{dev} eq 'AC97') {
3188 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
3189 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3190 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
3191 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3192 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
3193 } else {
3194 die "unkown audio device '$audio->{dev}', implement me!";
3195 }
3196
3197 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3198
3199 return $devs;
3200 }
3201
3202 sub get_tpm_paths {
3203 my ($vmid) = @_;
3204 return {
3205 socket => "/var/run/qemu-server/$vmid.swtpm",
3206 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3207 };
3208 }
3209
3210 sub add_tpm_device {
3211 my ($vmid, $devices, $conf) = @_;
3212
3213 return if !$conf->{tpmstate0};
3214
3215 my $paths = get_tpm_paths($vmid);
3216
3217 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3218 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3219 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3220 }
3221
3222 sub start_swtpm {
3223 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3224
3225 return if !$tpmdrive;
3226
3227 my $state;
3228 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3229 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3230 if ($storeid) {
3231 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3232 } else {
3233 $state = $tpm->{file};
3234 }
3235
3236 my $paths = get_tpm_paths($vmid);
3237
3238 # during migration, we will get state from remote
3239 #
3240 if (!$migration) {
3241 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3242 my $setup_cmd = [
3243 "swtpm_setup",
3244 "--tpmstate",
3245 "file://$state",
3246 "--createek",
3247 "--create-ek-cert",
3248 "--create-platform-cert",
3249 "--lock-nvram",
3250 "--config",
3251 "/etc/swtpm_setup.conf", # do not use XDG configs
3252 "--runas",
3253 "0", # force creation as root, error if not possible
3254 "--not-overwrite", # ignore existing state, do not modify
3255 ];
3256
3257 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3258 # TPM 2.0 supports ECC crypto, use if possible
3259 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3260
3261 run_command($setup_cmd, outfunc => sub {
3262 print "swtpm_setup: $1\n";
3263 });
3264 }
3265
3266 # Used to distinguish different invocations in the log.
3267 my $log_prefix = "[id=" . int(time()) . "] ";
3268
3269 my $emulator_cmd = [
3270 "swtpm",
3271 "socket",
3272 "--tpmstate",
3273 "backend-uri=file://$state,mode=0600",
3274 "--ctrl",
3275 "type=unixio,path=$paths->{socket},mode=0600",
3276 "--pid",
3277 "file=$paths->{pid}",
3278 "--terminate", # terminate on QEMU disconnect
3279 "--daemon",
3280 "--log",
3281 "file=/run/qemu-server/$vmid-swtpm.log,level=1,prefix=$log_prefix",
3282 ];
3283 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3284 run_command($emulator_cmd, outfunc => sub { print $1; });
3285
3286 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3287 while (! -e $paths->{pid}) {
3288 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3289 usleep(50_000);
3290 }
3291
3292 # return untainted PID of swtpm daemon so it can be killed on error
3293 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3294 return $1;
3295 }
3296
3297 sub vga_conf_has_spice {
3298 my ($vga) = @_;
3299
3300 my $vgaconf = parse_vga($vga);
3301 my $vgatype = $vgaconf->{type};
3302 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3303
3304 return $1 || 1;
3305 }
3306
3307 sub is_native($) {
3308 my ($arch) = @_;
3309 return get_host_arch() eq $arch;
3310 }
3311
3312 sub get_vm_arch {
3313 my ($conf) = @_;
3314 return $conf->{arch} // get_host_arch();
3315 }
3316
3317 my $default_machines = {
3318 x86_64 => 'pc',
3319 aarch64 => 'virt',
3320 };
3321
3322 sub get_installed_machine_version {
3323 my ($kvmversion) = @_;
3324 $kvmversion = kvm_user_version() if !defined($kvmversion);
3325 $kvmversion =~ m/^(\d+\.\d+)/;
3326 return $1;
3327 }
3328
3329 sub windows_get_pinned_machine_version {
3330 my ($machine, $base_version, $kvmversion) = @_;
3331
3332 my $pin_version = $base_version;
3333 if (!defined($base_version) ||
3334 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3335 ) {
3336 $pin_version = get_installed_machine_version($kvmversion);
3337 }
3338 if (!$machine || $machine eq 'pc') {
3339 $machine = "pc-i440fx-$pin_version";
3340 } elsif ($machine eq 'q35') {
3341 $machine = "pc-q35-$pin_version";
3342 } elsif ($machine eq 'virt') {
3343 $machine = "virt-$pin_version";
3344 } else {
3345 warn "unknown machine type '$machine', not touching that!\n";
3346 }
3347
3348 return $machine;
3349 }
3350
3351 sub get_vm_machine {
3352 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3353
3354 my $machine = $forcemachine || $conf->{machine};
3355
3356 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3357 $kvmversion //= kvm_user_version();
3358 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3359 # layout which confuses windows quite a bit and may result in various regressions..
3360 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3361 if (windows_version($conf->{ostype})) {
3362 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3363 }
3364 $arch //= 'x86_64';
3365 $machine ||= $default_machines->{$arch};
3366 if ($add_pve_version) {
3367 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3368 $machine .= "+pve$pvever";
3369 }
3370 }
3371
3372 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3373 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3374 $machine = $1 if $is_pxe;
3375
3376 # for version-pinned machines that do not include a pve-version (e.g.
3377 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3378 $machine .= '+pve0';
3379
3380 $machine .= '.pxe' if $is_pxe;
3381 }
3382
3383 return $machine;
3384 }
3385
3386 sub get_ovmf_files($$$) {
3387 my ($arch, $efidisk, $smm) = @_;
3388
3389 my $types = $OVMF->{$arch}
3390 or die "no OVMF images known for architecture '$arch'\n";
3391
3392 my $type = 'default';
3393 if ($arch ne "aarch64" && defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3394 $type = $smm ? "4m" : "4m-no-smm";
3395 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3396 }
3397
3398 my ($ovmf_code, $ovmf_vars) = $types->{$type}->@*;
3399 die "EFI base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3400 die "EFI vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
3401
3402 return ($ovmf_code, $ovmf_vars);
3403 }
3404
3405 my $Arch2Qemu = {
3406 aarch64 => '/usr/bin/qemu-system-aarch64',
3407 x86_64 => '/usr/bin/qemu-system-x86_64',
3408 };
3409 sub get_command_for_arch($) {
3410 my ($arch) = @_;
3411 return '/usr/bin/kvm' if is_native($arch);
3412
3413 my $cmd = $Arch2Qemu->{$arch}
3414 or die "don't know how to emulate architecture '$arch'\n";
3415 return $cmd;
3416 }
3417
3418 # To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3419 # to use in a QEMU command line (-cpu element), first array_intersect the result
3420 # of query_supported_ with query_understood_. This is necessary because:
3421 #
3422 # a) query_understood_ returns flags the host cannot use and
3423 # b) query_supported_ (rather the QMP call) doesn't actually return CPU
3424 # flags, but CPU settings - with most of them being flags. Those settings
3425 # (and some flags, curiously) cannot be specified as a "-cpu" argument.
3426 #
3427 # query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3428 # expensive. If you need the value returned from this, you can get it much
3429 # cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3430 # $accel being 'kvm' or 'tcg'.
3431 #
3432 # pvestatd calls this function on startup and whenever the QEMU/KVM version
3433 # changes, automatically populating pmxcfs.
3434 #
3435 # Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3436 # since kvm and tcg machines support different flags
3437 #
3438 sub query_supported_cpu_flags {
3439 my ($arch) = @_;
3440
3441 $arch //= get_host_arch();
3442 my $default_machine = $default_machines->{$arch};
3443
3444 my $flags = {};
3445
3446 # FIXME: Once this is merged, the code below should work for ARM as well:
3447 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3448 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3449 $arch eq "aarch64";
3450
3451 my $kvm_supported = defined(kvm_version());
3452 my $qemu_cmd = get_command_for_arch($arch);
3453 my $fakevmid = -1;
3454 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3455
3456 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3457 my $query_supported_run_qemu = sub {
3458 my ($kvm) = @_;
3459
3460 my $flags = {};
3461 my $cmd = [
3462 $qemu_cmd,
3463 '-machine', $default_machine,
3464 '-display', 'none',
3465 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3466 '-mon', 'chardev=qmp,mode=control',
3467 '-pidfile', $pidfile,
3468 '-S', '-daemonize'
3469 ];
3470
3471 if (!$kvm) {
3472 push @$cmd, '-accel', 'tcg';
3473 }
3474
3475 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3476 die "QEMU flag querying VM exited with code " . $rc if $rc;
3477
3478 eval {
3479 my $cmd_result = mon_cmd(
3480 $fakevmid,
3481 'query-cpu-model-expansion',
3482 type => 'full',
3483 model => { name => 'host' }
3484 );
3485
3486 my $props = $cmd_result->{model}->{props};
3487 foreach my $prop (keys %$props) {
3488 next if $props->{$prop} ne '1';
3489 # QEMU returns some flags multiple times, with '_', '.' or '-'
3490 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3491 # We only keep those with underscores, to match /proc/cpuinfo
3492 $prop =~ s/\.|-/_/g;
3493 $flags->{$prop} = 1;
3494 }
3495 };
3496 my $err = $@;
3497
3498 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3499 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3500
3501 die $err if $err;
3502
3503 return [ sort keys %$flags ];
3504 };
3505
3506 # We need to query QEMU twice, since KVM and TCG have different supported flags
3507 PVE::QemuConfig->lock_config($fakevmid, sub {
3508 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3509 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3510
3511 if ($kvm_supported) {
3512 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3513 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3514 }
3515 });
3516
3517 return $flags;
3518 }
3519
3520 # Understood CPU flags are written to a file at 'pve-qemu' compile time
3521 my $understood_cpu_flag_dir = "/usr/share/kvm";
3522 sub query_understood_cpu_flags {
3523 my $arch = get_host_arch();
3524 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3525
3526 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3527 if ! -e $filepath;
3528
3529 my $raw = file_get_contents($filepath);
3530 $raw =~ s/^\s+|\s+$//g;
3531 my @flags = split(/\s+/, $raw);
3532
3533 return \@flags;
3534 }
3535
3536 # Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
3537 # anymore. But smm=off seems to be required when using SeaBIOS and serial display.
3538 my sub should_disable_smm {
3539 my ($conf, $vga, $machine) = @_;
3540
3541 return if $machine =~ m/^virt/; # there is no smm flag that could be disabled
3542
3543 return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
3544 $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
3545 }
3546
3547 my sub print_ovmf_drive_commandlines {
3548 my ($conf, $storecfg, $vmid, $arch, $q35, $version_guard) = @_;
3549
3550 my $d = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
3551
3552 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3553
3554 my $var_drive_str = "if=pflash,unit=1,id=drive-efidisk0";
3555 if ($d) {
3556 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3557 my ($path, $format) = $d->@{'file', 'format'};
3558 if ($storeid) {
3559 $path = PVE::Storage::path($storecfg, $d->{file});
3560 if (!defined($format)) {
3561 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3562 $format = qemu_img_format($scfg, $volname);
3563 }
3564 } elsif (!defined($format)) {
3565 die "efidisk format must be specified\n";
3566 }
3567 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3568 if ($path =~ m/^rbd:/) {
3569 $var_drive_str .= ',cache=writeback';
3570 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3571 }
3572 $var_drive_str .= ",format=$format,file=$path";
3573
3574 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $format eq 'raw' && $version_guard->(4, 1, 2);
3575 $var_drive_str .= ',readonly=on' if drive_is_read_only($conf, $d);
3576 } else {
3577 log_warn("no efidisk configured! Using temporary efivars disk.");
3578 my $path = "/tmp/$vmid-ovmf.fd";
3579 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3580 $var_drive_str .= ",format=raw,file=$path";
3581 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $version_guard->(4, 1, 2);
3582 }
3583
3584 return ("if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code", $var_drive_str);
3585 }
3586
3587 sub config_to_command {
3588 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3589 $pbs_backing) = @_;
3590
3591 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3592 my $devices = [];
3593 my $bridges = {};
3594 my $ostype = $conf->{ostype};
3595 my $winversion = windows_version($ostype);
3596 my $kvm = $conf->{kvm};
3597 my $nodename = nodename();
3598
3599 my $arch = get_vm_arch($conf);
3600 my $kvm_binary = get_command_for_arch($arch);
3601 my $kvmver = kvm_user_version($kvm_binary);
3602
3603 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3604 $kvmver //= "undefined";
3605 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3606 }
3607
3608 my $add_pve_version = min_version($kvmver, 4, 1);
3609
3610 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3611 my $machine_version = extract_version($machine_type, $kvmver);
3612 $kvm //= 1 if is_native($arch);
3613
3614 $machine_version =~ m/(\d+)\.(\d+)/;
3615 my ($machine_major, $machine_minor) = ($1, $2);
3616
3617 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3618 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3619 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3620 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3621 ." please upgrade node '$nodename'\n"
3622 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3623 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3624 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3625 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3626 ." node '$nodename'\n";
3627 }
3628
3629 # if a specific +pve version is required for a feature, use $version_guard
3630 # instead of min_version to allow machines to be run with the minimum
3631 # required version
3632 my $required_pve_version = 0;
3633 my $version_guard = sub {
3634 my ($major, $minor, $pve) = @_;
3635 return 0 if !min_version($machine_version, $major, $minor, $pve);
3636 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3637 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3638 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3639 return 1;
3640 };
3641
3642 if ($kvm && !defined kvm_version()) {
3643 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3644 ." or enable in BIOS.\n";
3645 }
3646
3647 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3648 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3649 my $use_old_bios_files = undef;
3650 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3651
3652 my $cmd = [];
3653 if ($conf->{affinity}) {
3654 push @$cmd, '/usr/bin/taskset', '--cpu-list', '--all-tasks', $conf->{affinity};
3655 }
3656
3657 push @$cmd, $kvm_binary;
3658
3659 push @$cmd, '-id', $vmid;
3660
3661 my $vmname = $conf->{name} || "vm$vmid";
3662
3663 push @$cmd, '-name', "$vmname,debug-threads=on";
3664
3665 push @$cmd, '-no-shutdown';
3666
3667 my $use_virtio = 0;
3668
3669 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3670 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3671 push @$cmd, '-mon', "chardev=qmp,mode=control";
3672
3673 if (min_version($machine_version, 2, 12)) {
3674 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3675 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3676 }
3677
3678 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3679
3680 push @$cmd, '-daemonize';
3681
3682 if ($conf->{smbios1}) {
3683 my $smbios_conf = parse_smbios1($conf->{smbios1});
3684 if ($smbios_conf->{base64}) {
3685 # Do not pass base64 flag to qemu
3686 delete $smbios_conf->{base64};
3687 my $smbios_string = "";
3688 foreach my $key (keys %$smbios_conf) {
3689 my $value;
3690 if ($key eq "uuid") {
3691 $value = $smbios_conf->{uuid}
3692 } else {
3693 $value = decode_base64($smbios_conf->{$key});
3694 }
3695 # qemu accepts any binary data, only commas need escaping by double comma
3696 $value =~ s/,/,,/g;
3697 $smbios_string .= "," . $key . "=" . $value if $value;
3698 }
3699 push @$cmd, '-smbios', "type=1" . $smbios_string;
3700 } else {
3701 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3702 }
3703 }
3704
3705 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3706 my ($code_drive_str, $var_drive_str) =
3707 print_ovmf_drive_commandlines($conf, $storecfg, $vmid, $arch, $q35, $version_guard);
3708 push $cmd->@*, '-drive', $code_drive_str;
3709 push $cmd->@*, '-drive', $var_drive_str;
3710 }
3711
3712 if ($q35) { # tell QEMU to load q35 config early
3713 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3714 if (min_version($machine_version, 4, 0)) {
3715 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3716 } else {
3717 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3718 }
3719 }
3720
3721 if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
3722 push @$cmd, $fixups->@*;
3723 }
3724
3725 if ($conf->{vmgenid}) {
3726 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3727 }
3728
3729 # add usb controllers
3730 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3731 $conf, $bridges, $arch, $machine_type, $machine_version);
3732 push @$devices, @usbcontrollers if @usbcontrollers;
3733 my $vga = parse_vga($conf->{vga});
3734
3735 my $qxlnum = vga_conf_has_spice($conf->{vga});
3736 $vga->{type} = 'qxl' if $qxlnum;
3737
3738 if (!$vga->{type}) {
3739 if ($arch eq 'aarch64') {
3740 $vga->{type} = 'virtio';
3741 } elsif (min_version($machine_version, 2, 9)) {
3742 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3743 } else {
3744 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3745 }
3746 }
3747
3748 # enable absolute mouse coordinates (needed by vnc)
3749 my $tablet = $conf->{tablet};
3750 if (!defined($tablet)) {
3751 $tablet = $defaults->{tablet};
3752 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3753 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3754 }
3755
3756 if ($tablet) {
3757 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3758 my $kbd = print_keyboarddevice_full($conf, $arch);
3759 push @$devices, '-device', $kbd if defined($kbd);
3760 }
3761
3762 my $bootorder = device_bootorder($conf);
3763
3764 # host pci device passthrough
3765 my ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices) = PVE::QemuServer::PCI::print_hostpci_devices(
3766 $vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder);
3767
3768 # usb devices
3769 my $usb_dev_features = {};
3770 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3771
3772 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3773 $conf, $usb_dev_features, $bootorder, $machine_version);
3774 push @$devices, @usbdevices if @usbdevices;
3775
3776 # serial devices
3777 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3778 my $path = $conf->{"serial$i"} or next;
3779 if ($path eq 'socket') {
3780 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3781 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3782 # On aarch64, serial0 is the UART device. QEMU only allows
3783 # connecting UART devices via the '-serial' command line, as
3784 # the device has a fixed slot on the hardware...
3785 if ($arch eq 'aarch64' && $i == 0) {
3786 push @$devices, '-serial', "chardev:serial$i";
3787 } else {
3788 push @$devices, '-device', "isa-serial,chardev=serial$i";
3789 }
3790 } else {
3791 die "no such serial device\n" if ! -c $path;
3792 push @$devices, '-chardev', "serial,id=serial$i,path=$path";
3793 push @$devices, '-device', "isa-serial,chardev=serial$i";
3794 }
3795 }
3796
3797 # parallel devices
3798 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3799 if (my $path = $conf->{"parallel$i"}) {
3800 die "no such parallel device\n" if ! -c $path;
3801 my $devtype = $path =~ m!^/dev/usb/lp! ? 'serial' : 'parallel';
3802 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3803 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3804 }
3805 }
3806
3807 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3808 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3809 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3810 push @$devices, @$audio_devs;
3811 }
3812
3813 add_tpm_device($vmid, $devices, $conf);
3814
3815 my $sockets = 1;
3816 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3817 $sockets = $conf->{sockets} if $conf->{sockets};
3818
3819 my $cores = $conf->{cores} || 1;
3820
3821 my $maxcpus = $sockets * $cores;
3822
3823 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3824
3825 my $allowed_vcpus = $cpuinfo->{cpus};
3826
3827 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3828
3829 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3830 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3831 for (my $i = 2; $i <= $vcpus; $i++) {
3832 my $cpustr = print_cpu_device($conf,$i);
3833 push @$cmd, '-device', $cpustr;
3834 }
3835
3836 } else {
3837
3838 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3839 }
3840 push @$cmd, '-nodefaults';
3841
3842 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3843
3844 push $machineFlags->@*, 'acpi=off' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3845
3846 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3847
3848 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3849 push @$devices, '-device', print_vga_device(
3850 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3851
3852 push @$cmd, '-display', 'egl-headless,gl=core' if $vga->{type} eq 'virtio-gl'; # VIRGL
3853
3854 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3855 push @$cmd, '-vnc', "unix:$socket,password=on";
3856 } else {
3857 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3858 push @$cmd, '-nographic';
3859 }
3860
3861 # time drift fix
3862 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3863 my $useLocaltime = $conf->{localtime};
3864
3865 if ($winversion >= 5) { # windows
3866 $useLocaltime = 1 if !defined($conf->{localtime});
3867
3868 # use time drift fix when acpi is enabled
3869 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3870 $tdf = 1 if !defined($conf->{tdf});
3871 }
3872 }
3873
3874 if ($winversion >= 6) {
3875 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3876 push @$machineFlags, 'hpet=off';
3877 }
3878
3879 push @$rtcFlags, 'driftfix=slew' if $tdf;
3880
3881 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3882 push @$rtcFlags, "base=$conf->{startdate}";
3883 } elsif ($useLocaltime) {
3884 push @$rtcFlags, 'base=localtime';
3885 }
3886
3887 if ($forcecpu) {
3888 push @$cmd, '-cpu', $forcecpu;
3889 } else {
3890 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3891 }
3892
3893 PVE::QemuServer::Memory::config(
3894 $conf, $vmid, $sockets, $cores, $defaults, $hotplug_features->{memory}, $cmd);
3895
3896 push @$cmd, '-S' if $conf->{freeze};
3897
3898 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3899
3900 my $guest_agent = parse_guest_agent($conf);
3901
3902 if ($guest_agent->{enabled}) {
3903 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3904 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3905
3906 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3907 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3908 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3909 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3910 } elsif ($guest_agent->{type} eq 'isa') {
3911 push @$devices, '-device', "isa-serial,chardev=qga0";
3912 }
3913 }
3914
3915 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3916 if ($rng && $version_guard->(4, 1, 2)) {
3917 check_rng_source($rng->{source});
3918
3919 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3920 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3921 my $limiter_str = "";
3922 if ($max_bytes) {
3923 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3924 }
3925
3926 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3927 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3928 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3929 }
3930
3931 my $spice_port;
3932
3933 if ($qxlnum || $vga->{type} =~ /^virtio/) {
3934 if ($qxlnum > 1) {
3935 if ($winversion){
3936 for (my $i = 1; $i < $qxlnum; $i++){
3937 push @$devices, '-device', print_vga_device(
3938 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3939 }
3940 } else {
3941 # assume other OS works like Linux
3942 my ($ram, $vram) = ("134217728", "67108864");
3943 if ($vga->{memory}) {
3944 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3945 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3946 }
3947 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3948 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3949 }
3950 }
3951
3952 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3953
3954 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3955 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3956 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3957
3958 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3959 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3960 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3961
3962 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3963 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3964
3965 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3966 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3967 if ($spice_enhancement->{foldersharing}) {
3968 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3969 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3970 }
3971
3972 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3973 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3974 if $spice_enhancement->{videostreaming};
3975
3976 push @$devices, '-spice', "$spice_opts";
3977 }
3978
3979 # enable balloon by default, unless explicitly disabled
3980 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3981 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3982 my $ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr";
3983 $ballooncmd .= ",free-page-reporting=on" if min_version($machine_version, 6, 2);
3984 push @$devices, '-device', $ballooncmd;
3985 }
3986
3987 if ($conf->{watchdog}) {
3988 my $wdopts = parse_watchdog($conf->{watchdog});
3989 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3990 my $watchdog = $wdopts->{model} || 'i6300esb';
3991 push @$devices, '-device', "$watchdog$pciaddr";
3992 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3993 }
3994
3995 my $vollist = [];
3996 my $scsicontroller = {};
3997 my $ahcicontroller = {};
3998 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3999
4000 # Add iscsi initiator name if available
4001 if (my $initiator = get_initiator_name()) {
4002 push @$devices, '-iscsi', "initiator-name=$initiator";
4003 }
4004
4005 PVE::QemuConfig->foreach_volume($conf, sub {
4006 my ($ds, $drive) = @_;
4007
4008 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
4009 check_volume_storage_type($storecfg, $drive->{file});
4010 push @$vollist, $drive->{file};
4011 }
4012
4013 # ignore efidisk here, already added in bios/fw handling code above
4014 return if $drive->{interface} eq 'efidisk';
4015 # similar for TPM
4016 return if $drive->{interface} eq 'tpmstate';
4017
4018 $use_virtio = 1 if $ds =~ m/^virtio/;
4019
4020 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
4021
4022 if ($drive->{interface} eq 'virtio'){
4023 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
4024 }
4025
4026 if ($drive->{interface} eq 'scsi') {
4027
4028 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
4029
4030 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
4031 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
4032
4033 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
4034 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
4035
4036 my $iothread = '';
4037 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
4038 $iothread .= ",iothread=iothread-$controller_prefix$controller";
4039 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
4040 } elsif ($drive->{iothread}) {
4041 log_warn(
4042 "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n"
4043 );
4044 }
4045
4046 my $queues = '';
4047 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
4048 $queues = ",num_queues=$drive->{queues}";
4049 }
4050
4051 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
4052 if !$scsicontroller->{$controller};
4053 $scsicontroller->{$controller}=1;
4054 }
4055
4056 if ($drive->{interface} eq 'sata') {
4057 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
4058 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
4059 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
4060 if !$ahcicontroller->{$controller};
4061 $ahcicontroller->{$controller}=1;
4062 }
4063
4064 my $pbs_conf = $pbs_backing->{$ds};
4065 my $pbs_name = undef;
4066 if ($pbs_conf) {
4067 $pbs_name = "drive-$ds-pbs";
4068 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
4069 }
4070
4071 my $drive_cmd = print_drive_commandline_full(
4072 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
4073
4074 # extra protection for templates, but SATA and IDE don't support it..
4075 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
4076
4077 push @$devices, '-drive',$drive_cmd;
4078 push @$devices, '-device', print_drivedevice_full(
4079 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
4080 });
4081
4082 for (my $i = 0; $i < $MAX_NETS; $i++) {
4083 my $netname = "net$i";
4084
4085 next if !$conf->{$netname};
4086 my $d = parse_net($conf->{$netname});
4087 next if !$d;
4088 # save the MAC addr here (could be auto-gen. in some odd setups) for FDB registering later?
4089
4090 $use_virtio = 1 if $d->{model} eq 'virtio';
4091
4092 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
4093
4094 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
4095 push @$devices, '-netdev', $netdevfull;
4096
4097 my $netdevicefull = print_netdevice_full(
4098 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version);
4099
4100 push @$devices, '-device', $netdevicefull;
4101 }
4102
4103 if ($conf->{ivshmem}) {
4104 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
4105
4106 my $bus;
4107 if ($q35) {
4108 $bus = print_pcie_addr("ivshmem");
4109 } else {
4110 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
4111 }
4112
4113 my $ivshmem_name = $ivshmem->{name} // $vmid;
4114 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
4115
4116 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
4117 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
4118 .",size=$ivshmem->{size}M";
4119 }
4120
4121 # pci.4 is nested in pci.1
4122 $bridges->{1} = 1 if $bridges->{4};
4123
4124 if (!$q35) { # add pci bridges
4125 if (min_version($machine_version, 2, 3)) {
4126 $bridges->{1} = 1;
4127 $bridges->{2} = 1;
4128 }
4129 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
4130 }
4131
4132 for my $k (sort {$b cmp $a} keys %$bridges) {
4133 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
4134
4135 my $k_name = $k;
4136 if ($k == 2 && $legacy_igd) {
4137 $k_name = "$k-igd";
4138 }
4139 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
4140 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
4141
4142 if ($q35) { # add after -readconfig pve-q35.cfg
4143 splice @$devices, 2, 0, '-device', $devstr;
4144 } else {
4145 unshift @$devices, '-device', $devstr if $k > 0;
4146 }
4147 }
4148
4149 if (!$kvm) {
4150 push @$machineFlags, 'accel=tcg';
4151 }
4152
4153 push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga, $machine_type);
4154
4155 my $machine_type_min = $machine_type;
4156 if ($add_pve_version) {
4157 $machine_type_min =~ s/\+pve\d+$//;
4158 $machine_type_min .= "+pve$required_pve_version";
4159 }
4160 push @$machineFlags, "type=${machine_type_min}";
4161
4162 push @$cmd, @$devices;
4163 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
4164 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
4165 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
4166
4167 if (my $vmstate = $conf->{vmstate}) {
4168 my $statepath = PVE::Storage::path($storecfg, $vmstate);
4169 push @$vollist, $vmstate;
4170 push @$cmd, '-loadstate', $statepath;
4171 print "activating and using '$vmstate' as vmstate\n";
4172 }
4173
4174 if (PVE::QemuConfig->is_template($conf)) {
4175 # needed to workaround base volumes being read-only
4176 push @$cmd, '-snapshot';
4177 }
4178
4179 # add custom args
4180 if ($conf->{args}) {
4181 my $aa = PVE::Tools::split_args($conf->{args});
4182 push @$cmd, @$aa;
4183 }
4184
4185 return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cmd;
4186 }
4187
4188 sub check_rng_source {
4189 my ($source) = @_;
4190
4191 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4192 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4193 if ! -e $source;
4194
4195 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4196 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4197 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4198 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4199 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4200 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4201 ." to the host.\n";
4202 }
4203 }
4204
4205 sub spice_port {
4206 my ($vmid) = @_;
4207
4208 my $res = mon_cmd($vmid, 'query-spice');
4209
4210 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
4211 }
4212
4213 sub vm_devices_list {
4214 my ($vmid) = @_;
4215
4216 my $res = mon_cmd($vmid, 'query-pci');
4217 my $devices_to_check = [];
4218 my $devices = {};
4219 foreach my $pcibus (@$res) {
4220 push @$devices_to_check, @{$pcibus->{devices}},
4221 }
4222
4223 while (@$devices_to_check) {
4224 my $to_check = [];
4225 for my $d (@$devices_to_check) {
4226 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
4227 next if !$d->{'pci_bridge'} || !$d->{'pci_bridge'}->{devices};
4228
4229 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4230 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
4231 }
4232 $devices_to_check = $to_check;
4233 }
4234
4235 my $resblock = mon_cmd($vmid, 'query-block');
4236 foreach my $block (@$resblock) {
4237 if($block->{device} =~ m/^drive-(\S+)/){
4238 $devices->{$1} = 1;
4239 }
4240 }
4241
4242 my $resmice = mon_cmd($vmid, 'query-mice');
4243 foreach my $mice (@$resmice) {
4244 if ($mice->{name} eq 'QEMU HID Tablet') {
4245 $devices->{tablet} = 1;
4246 last;
4247 }
4248 }
4249
4250 # for usb devices there is no query-usb
4251 # but we can iterate over the entries in
4252 # qom-list path=/machine/peripheral
4253 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4254 foreach my $per (@$resperipheral) {
4255 if ($per->{name} =~ m/^usb(?:redirdev)?\d+$/) {
4256 $devices->{$per->{name}} = 1;
4257 }
4258 }
4259
4260 return $devices;
4261 }
4262
4263 sub vm_deviceplug {
4264 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4265
4266 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4267
4268 my $devices_list = vm_devices_list($vmid);
4269 return 1 if defined($devices_list->{$deviceid});
4270
4271 # add PCI bridge if we need it for the device
4272 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4273
4274 if ($deviceid eq 'tablet') {
4275 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4276 } elsif ($deviceid eq 'keyboard') {
4277 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4278 } elsif ($deviceid =~ m/^usbredirdev(\d+)$/) {
4279 my $id = $1;
4280 qemu_spice_usbredir_chardev_add($vmid, "usbredirchardev$id");
4281 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_spice_usbdevice($id, "xhci", $id + 1));
4282 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4283 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device, {}, $1 + 1));
4284 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4285 qemu_iothread_add($vmid, $deviceid, $device);
4286
4287 qemu_driveadd($storecfg, $vmid, $device);
4288 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4289
4290 qemu_deviceadd($vmid, $devicefull);
4291 eval { qemu_deviceaddverify($vmid, $deviceid); };
4292 if (my $err = $@) {
4293 eval { qemu_drivedel($vmid, $deviceid); };
4294 warn $@ if $@;
4295 die $err;
4296 }
4297 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4298 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4299 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4300 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4301
4302 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4303
4304 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4305 qemu_iothread_add($vmid, $deviceid, $device);
4306 $devicefull .= ",iothread=iothread-$deviceid";
4307 }
4308
4309 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4310 $devicefull .= ",num_queues=$device->{queues}";
4311 }
4312
4313 qemu_deviceadd($vmid, $devicefull);
4314 qemu_deviceaddverify($vmid, $deviceid);
4315 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4316 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4317 qemu_driveadd($storecfg, $vmid, $device);
4318
4319 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4320 eval { qemu_deviceadd($vmid, $devicefull); };
4321 if (my $err = $@) {
4322 eval { qemu_drivedel($vmid, $deviceid); };
4323 warn $@ if $@;
4324 die $err;
4325 }
4326 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4327 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4328
4329 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4330 my $machine_version = PVE::QemuServer::Machine::extract_version($machine_type);
4331 my $use_old_bios_files = undef;
4332 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4333
4334 my $netdevicefull = print_netdevice_full(
4335 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type, $machine_version);
4336 qemu_deviceadd($vmid, $netdevicefull);
4337 eval {
4338 qemu_deviceaddverify($vmid, $deviceid);
4339 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4340 };
4341 if (my $err = $@) {
4342 eval { qemu_netdevdel($vmid, $deviceid); };
4343 warn $@ if $@;
4344 die $err;
4345 }
4346 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4347 my $bridgeid = $2;
4348 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4349 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4350
4351 qemu_deviceadd($vmid, $devicefull);
4352 qemu_deviceaddverify($vmid, $deviceid);
4353 } else {
4354 die "can't hotplug device '$deviceid'\n";
4355 }
4356
4357 return 1;
4358 }
4359
4360 # fixme: this should raise exceptions on error!
4361 sub vm_deviceunplug {
4362 my ($vmid, $conf, $deviceid) = @_;
4363
4364 my $devices_list = vm_devices_list($vmid);
4365 return 1 if !defined($devices_list->{$deviceid});
4366
4367 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4368 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4369
4370 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard' || $deviceid eq 'xhci') {
4371 qemu_devicedel($vmid, $deviceid);
4372 } elsif ($deviceid =~ m/^usbredirdev\d+$/) {
4373 qemu_devicedel($vmid, $deviceid);
4374 qemu_devicedelverify($vmid, $deviceid);
4375 } elsif ($deviceid =~ m/^usb\d+$/) {
4376 qemu_devicedel($vmid, $deviceid);
4377 qemu_devicedelverify($vmid, $deviceid);
4378 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4379 my $device = parse_drive($deviceid, $conf->{$deviceid});
4380
4381 qemu_devicedel($vmid, $deviceid);
4382 qemu_devicedelverify($vmid, $deviceid);
4383 qemu_drivedel($vmid, $deviceid);
4384 qemu_iothread_del($vmid, $deviceid, $device);
4385 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4386 qemu_devicedel($vmid, $deviceid);
4387 qemu_devicedelverify($vmid, $deviceid);
4388 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4389 my $device = parse_drive($deviceid, $conf->{$deviceid});
4390
4391 qemu_devicedel($vmid, $deviceid);
4392 qemu_devicedelverify($vmid, $deviceid);
4393 qemu_drivedel($vmid, $deviceid);
4394 qemu_deletescsihw($conf, $vmid, $deviceid);
4395
4396 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4397 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4398 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4399 qemu_devicedel($vmid, $deviceid);
4400 qemu_devicedelverify($vmid, $deviceid);
4401 qemu_netdevdel($vmid, $deviceid);
4402 } else {
4403 die "can't unplug device '$deviceid'\n";
4404 }
4405
4406 return 1;
4407 }
4408
4409 sub qemu_spice_usbredir_chardev_add {
4410 my ($vmid, $id) = @_;
4411
4412 mon_cmd($vmid, "chardev-add" , (
4413 id => $id,
4414 backend => {
4415 type => 'spicevmc',
4416 data => {
4417 type => "usbredir",
4418 },
4419 },
4420 ));
4421 }
4422
4423 sub qemu_deviceadd {
4424 my ($vmid, $devicefull) = @_;
4425
4426 $devicefull = "driver=".$devicefull;
4427 my %options = split(/[=,]/, $devicefull);
4428
4429 mon_cmd($vmid, "device_add" , %options);
4430 }
4431
4432 sub qemu_devicedel {
4433 my ($vmid, $deviceid) = @_;
4434
4435 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
4436 }
4437
4438 sub qemu_iothread_add {
4439 my ($vmid, $deviceid, $device) = @_;
4440
4441 if ($device->{iothread}) {
4442 my $iothreads = vm_iothreads_list($vmid);
4443 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4444 }
4445 }
4446
4447 sub qemu_iothread_del {
4448 my ($vmid, $deviceid, $device) = @_;
4449
4450 if ($device->{iothread}) {
4451 my $iothreads = vm_iothreads_list($vmid);
4452 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4453 }
4454 }
4455
4456 sub qemu_objectadd {
4457 my ($vmid, $objectid, $qomtype) = @_;
4458
4459 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4460
4461 return 1;
4462 }
4463
4464 sub qemu_objectdel {
4465 my ($vmid, $objectid) = @_;
4466
4467 mon_cmd($vmid, "object-del", id => $objectid);
4468
4469 return 1;
4470 }
4471
4472 sub qemu_driveadd {
4473 my ($storecfg, $vmid, $device) = @_;
4474
4475 my $kvmver = get_running_qemu_version($vmid);
4476 my $io_uring = min_version($kvmver, 6, 0);
4477 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4478 $drive =~ s/\\/\\\\/g;
4479 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4480
4481 # If the command succeeds qemu prints: "OK"
4482 return 1 if $ret =~ m/OK/s;
4483
4484 die "adding drive failed: $ret\n";
4485 }
4486
4487 sub qemu_drivedel {
4488 my ($vmid, $deviceid) = @_;
4489
4490 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4491 $ret =~ s/^\s+//;
4492
4493 return 1 if $ret eq "";
4494
4495 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4496 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4497
4498 die "deleting drive $deviceid failed : $ret\n";
4499 }
4500
4501 sub qemu_deviceaddverify {
4502 my ($vmid, $deviceid) = @_;
4503
4504 for (my $i = 0; $i <= 5; $i++) {
4505 my $devices_list = vm_devices_list($vmid);
4506 return 1 if defined($devices_list->{$deviceid});
4507 sleep 1;
4508 }
4509
4510 die "error on hotplug device '$deviceid'\n";
4511 }
4512
4513
4514 sub qemu_devicedelverify {
4515 my ($vmid, $deviceid) = @_;
4516
4517 # need to verify that the device is correctly removed as device_del
4518 # is async and empty return is not reliable
4519
4520 for (my $i = 0; $i <= 5; $i++) {
4521 my $devices_list = vm_devices_list($vmid);
4522 return 1 if !defined($devices_list->{$deviceid});
4523 sleep 1;
4524 }
4525
4526 die "error on hot-unplugging device '$deviceid'\n";
4527 }
4528
4529 sub qemu_findorcreatescsihw {
4530 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4531
4532 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4533
4534 my $scsihwid="$controller_prefix$controller";
4535 my $devices_list = vm_devices_list($vmid);
4536
4537 if (!defined($devices_list->{$scsihwid})) {
4538 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4539 }
4540
4541 return 1;
4542 }
4543
4544 sub qemu_deletescsihw {
4545 my ($conf, $vmid, $opt) = @_;
4546
4547 my $device = parse_drive($opt, $conf->{$opt});
4548
4549 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4550 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4551 return 1;
4552 }
4553
4554 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4555
4556 my $devices_list = vm_devices_list($vmid);
4557 foreach my $opt (keys %{$devices_list}) {
4558 if (is_valid_drivename($opt)) {
4559 my $drive = parse_drive($opt, $conf->{$opt});
4560 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4561 return 1;
4562 }
4563 }
4564 }
4565
4566 my $scsihwid="scsihw$controller";
4567
4568 vm_deviceunplug($vmid, $conf, $scsihwid);
4569
4570 return 1;
4571 }
4572
4573 sub qemu_add_pci_bridge {
4574 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4575
4576 my $bridges = {};
4577
4578 my $bridgeid;
4579
4580 print_pci_addr($device, $bridges, $arch, $machine_type);
4581
4582 while (my ($k, $v) = each %$bridges) {
4583 $bridgeid = $k;
4584 }
4585 return 1 if !defined($bridgeid) || $bridgeid < 1;
4586
4587 my $bridge = "pci.$bridgeid";
4588 my $devices_list = vm_devices_list($vmid);
4589
4590 if (!defined($devices_list->{$bridge})) {
4591 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4592 }
4593
4594 return 1;
4595 }
4596
4597 sub qemu_set_link_status {
4598 my ($vmid, $device, $up) = @_;
4599
4600 mon_cmd($vmid, "set_link", name => $device,
4601 up => $up ? JSON::true : JSON::false);
4602 }
4603
4604 sub qemu_netdevadd {
4605 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4606
4607 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4608 my %options = split(/[=,]/, $netdev);
4609
4610 if (defined(my $vhost = $options{vhost})) {
4611 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4612 }
4613
4614 if (defined(my $queues = $options{queues})) {
4615 $options{queues} = $queues + 0;
4616 }
4617
4618 mon_cmd($vmid, "netdev_add", %options);
4619 return 1;
4620 }
4621
4622 sub qemu_netdevdel {
4623 my ($vmid, $deviceid) = @_;
4624
4625 mon_cmd($vmid, "netdev_del", id => $deviceid);
4626 }
4627
4628 sub qemu_usb_hotplug {
4629 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4630
4631 return if !$device;
4632
4633 # remove the old one first
4634 vm_deviceunplug($vmid, $conf, $deviceid);
4635
4636 # check if xhci controller is necessary and available
4637 my $devicelist = vm_devices_list($vmid);
4638
4639 if (!$devicelist->{xhci}) {
4640 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4641 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr));
4642 }
4643
4644 # add the new one
4645 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type);
4646 }
4647
4648 sub qemu_cpu_hotplug {
4649 my ($vmid, $conf, $vcpus) = @_;
4650
4651 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4652
4653 my $sockets = 1;
4654 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4655 $sockets = $conf->{sockets} if $conf->{sockets};
4656 my $cores = $conf->{cores} || 1;
4657 my $maxcpus = $sockets * $cores;
4658
4659 $vcpus = $maxcpus if !$vcpus;
4660
4661 die "you can't add more vcpus than maxcpus\n"
4662 if $vcpus > $maxcpus;
4663
4664 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4665
4666 if ($vcpus < $currentvcpus) {
4667
4668 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4669
4670 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4671 qemu_devicedel($vmid, "cpu$i");
4672 my $retry = 0;
4673 my $currentrunningvcpus = undef;
4674 while (1) {
4675 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4676 last if scalar(@{$currentrunningvcpus}) == $i-1;
4677 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4678 $retry++;
4679 sleep 1;
4680 }
4681 #update conf after each succesfull cpu unplug
4682 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4683 PVE::QemuConfig->write_config($vmid, $conf);
4684 }
4685 } else {
4686 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4687 }
4688
4689 return;
4690 }
4691
4692 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4693 die "vcpus in running vm does not match its configuration\n"
4694 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4695
4696 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4697
4698 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4699 my $cpustr = print_cpu_device($conf, $i);
4700 qemu_deviceadd($vmid, $cpustr);
4701
4702 my $retry = 0;
4703 my $currentrunningvcpus = undef;
4704 while (1) {
4705 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4706 last if scalar(@{$currentrunningvcpus}) == $i;
4707 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4708 sleep 1;
4709 $retry++;
4710 }
4711 #update conf after each succesfull cpu hotplug
4712 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4713 PVE::QemuConfig->write_config($vmid, $conf);
4714 }
4715 } else {
4716
4717 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4718 mon_cmd($vmid, "cpu-add", id => int($i));
4719 }
4720 }
4721 }
4722
4723 sub qemu_block_set_io_throttle {
4724 my ($vmid, $deviceid,
4725 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4726 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4727 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4728 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4729
4730 return if !check_running($vmid) ;
4731
4732 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4733 bps => int($bps),
4734 bps_rd => int($bps_rd),
4735 bps_wr => int($bps_wr),
4736 iops => int($iops),
4737 iops_rd => int($iops_rd),
4738 iops_wr => int($iops_wr),
4739 bps_max => int($bps_max),
4740 bps_rd_max => int($bps_rd_max),
4741 bps_wr_max => int($bps_wr_max),
4742 iops_max => int($iops_max),
4743 iops_rd_max => int($iops_rd_max),
4744 iops_wr_max => int($iops_wr_max),
4745 bps_max_length => int($bps_max_length),
4746 bps_rd_max_length => int($bps_rd_max_length),
4747 bps_wr_max_length => int($bps_wr_max_length),
4748 iops_max_length => int($iops_max_length),
4749 iops_rd_max_length => int($iops_rd_max_length),
4750 iops_wr_max_length => int($iops_wr_max_length),
4751 );
4752
4753 }
4754
4755 sub qemu_block_resize {
4756 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4757
4758 my $running = check_running($vmid);
4759
4760 PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4761
4762 return if !$running;
4763
4764 my $padding = (1024 - $size % 1024) % 1024;
4765 $size = $size + $padding;
4766
4767 mon_cmd(
4768 $vmid,
4769 "block_resize",
4770 device => $deviceid,
4771 size => int($size),
4772 timeout => 60,
4773 );
4774 }
4775
4776 sub qemu_volume_snapshot {
4777 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4778
4779 my $running = check_running($vmid);
4780
4781 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4782 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4783 } else {
4784 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4785 }
4786 }
4787
4788 sub qemu_volume_snapshot_delete {
4789 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4790
4791 my $running = check_running($vmid);
4792
4793 if($running) {
4794
4795 $running = undef;
4796 my $conf = PVE::QemuConfig->load_config($vmid);
4797 PVE::QemuConfig->foreach_volume($conf, sub {
4798 my ($ds, $drive) = @_;
4799 $running = 1 if $drive->{file} eq $volid;
4800 });
4801 }
4802
4803 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4804 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
4805 } else {
4806 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4807 }
4808 }
4809
4810 sub set_migration_caps {
4811 my ($vmid, $savevm) = @_;
4812
4813 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4814
4815 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4816 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4817
4818 my $cap_ref = [];
4819
4820 my $enabled_cap = {
4821 "auto-converge" => 1,
4822 "xbzrle" => 1,
4823 "x-rdma-pin-all" => 0,
4824 "zero-blocks" => 0,
4825 "compress" => 0,
4826 "dirty-bitmaps" => $dirty_bitmaps,
4827 };
4828
4829 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4830
4831 for my $supported_capability (@$supported_capabilities) {
4832 push @$cap_ref, {
4833 capability => $supported_capability->{capability},
4834 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4835 };
4836 }
4837
4838 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4839 }
4840
4841 sub foreach_volid {
4842 my ($conf, $func, @param) = @_;
4843
4844 my $volhash = {};
4845
4846 my $test_volid = sub {
4847 my ($key, $drive, $snapname) = @_;
4848
4849 my $volid = $drive->{file};
4850 return if !$volid;
4851
4852 $volhash->{$volid}->{cdrom} //= 1;
4853 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4854
4855 my $replicate = $drive->{replicate} // 1;
4856 $volhash->{$volid}->{replicate} //= 0;
4857 $volhash->{$volid}->{replicate} = 1 if $replicate;
4858
4859 $volhash->{$volid}->{shared} //= 0;
4860 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4861
4862 $volhash->{$volid}->{referenced_in_config} //= 0;
4863 $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname);
4864
4865 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4866 if defined($snapname);
4867
4868 my $size = $drive->{size};
4869 $volhash->{$volid}->{size} //= $size if $size;
4870
4871 $volhash->{$volid}->{is_vmstate} //= 0;
4872 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4873
4874 $volhash->{$volid}->{is_tpmstate} //= 0;
4875 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4876
4877 $volhash->{$volid}->{is_unused} //= 0;
4878 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4879
4880 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4881 };
4882
4883 my $include_opts = {
4884 extra_keys => ['vmstate'],
4885 include_unused => 1,
4886 };
4887
4888 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4889 foreach my $snapname (keys %{$conf->{snapshots}}) {
4890 my $snap = $conf->{snapshots}->{$snapname};
4891 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4892 }
4893
4894 foreach my $volid (keys %$volhash) {
4895 &$func($volid, $volhash->{$volid}, @param);
4896 }
4897 }
4898
4899 my $fast_plug_option = {
4900 'description' => 1,
4901 'hookscript' => 1,
4902 'lock' => 1,
4903 'migrate_downtime' => 1,
4904 'migrate_speed' => 1,
4905 'name' => 1,
4906 'onboot' => 1,
4907 'protection' => 1,
4908 'shares' => 1,
4909 'startup' => 1,
4910 'tags' => 1,
4911 'vmstatestorage' => 1,
4912 };
4913
4914 for my $opt (keys %$confdesc_cloudinit) {
4915 $fast_plug_option->{$opt} = 1;
4916 };
4917
4918 # hotplug changes in [PENDING]
4919 # $selection hash can be used to only apply specified options, for
4920 # example: { cores => 1 } (only apply changed 'cores')
4921 # $errors ref is used to return error messages
4922 sub vmconfig_hotplug_pending {
4923 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4924
4925 my $defaults = load_defaults();
4926 my $arch = get_vm_arch($conf);
4927 my $machine_type = get_vm_machine($conf, undef, $arch);
4928
4929 # commit values which do not have any impact on running VM first
4930 # Note: those option cannot raise errors, we we do not care about
4931 # $selection and always apply them.
4932
4933 my $add_error = sub {
4934 my ($opt, $msg) = @_;
4935 $errors->{$opt} = "hotplug problem - $msg";
4936 };
4937
4938 my $cloudinit_pending_properties = PVE::QemuServer::cloudinit_pending_properties();
4939
4940 my $cloudinit_record_changed = sub {
4941 my ($conf, $opt, $old, $new) = @_;
4942 return if !$cloudinit_pending_properties->{$opt};
4943
4944 my $ci = ($conf->{cloudinit} //= {});
4945
4946 my $recorded = $ci->{$opt};
4947 my %added = map { $_ => 1 } PVE::Tools::split_list(delete($ci->{added}) // '');
4948
4949 if (defined($new)) {
4950 if (defined($old)) {
4951 # an existing value is being modified
4952 if (defined($recorded)) {
4953 # the value was already not in sync
4954 if ($new eq $recorded) {
4955 # a value is being reverted to the cloud-init state:
4956 delete $ci->{$opt};
4957 delete $added{$opt};
4958 } else {
4959 # the value was changed multiple times, do nothing
4960 }
4961 } elsif ($added{$opt}) {
4962 # the value had been marked as added and is being changed, do nothing
4963 } else {
4964 # the value is new, record it:
4965 $ci->{$opt} = $old;
4966 }
4967 } else {
4968 # a new value is being added
4969 if (defined($recorded)) {
4970 # it was already not in sync
4971 if ($new eq $recorded) {
4972 # a value is being reverted to the cloud-init state:
4973 delete $ci->{$opt};
4974 delete $added{$opt};
4975 } else {
4976 # the value had temporarily been removed, do nothing
4977 }
4978 } elsif ($added{$opt}) {
4979 # the value had been marked as added already, do nothing
4980 } else {
4981 # the value is new, add it
4982 $added{$opt} = 1;
4983 }
4984 }
4985 } elsif (!defined($old)) {
4986 # a non-existent value is being removed? ignore...
4987 } else {
4988 # a value is being deleted
4989 if (defined($recorded)) {
4990 # a value was already recorded, just keep it
4991 } elsif ($added{$opt}) {
4992 # the value was marked as added, remove it
4993 delete $added{$opt};
4994 } else {
4995 # a previously unrecorded value is being removed, record the old value:
4996 $ci->{$opt} = $old;
4997 }
4998 }
4999
5000 my $added = join(',', sort keys %added);
5001 $ci->{added} = $added if length($added);
5002 };
5003
5004 my $changes = 0;
5005 foreach my $opt (keys %{$conf->{pending}}) { # add/change
5006 if ($fast_plug_option->{$opt}) {
5007 my $new = delete $conf->{pending}->{$opt};
5008 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $new);
5009 $conf->{$opt} = $new;
5010 $changes = 1;
5011 }
5012 }
5013
5014 if ($changes) {
5015 PVE::QemuConfig->write_config($vmid, $conf);
5016 }
5017
5018 my $ostype = $conf->{ostype};
5019 my $version = extract_version($machine_type, get_running_qemu_version($vmid));
5020 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
5021 my $usb_hotplug = $hotplug_features->{usb}
5022 && min_version($version, 7, 1)
5023 && defined($ostype) && ($ostype eq 'l26' || windows_version($ostype) > 7);
5024
5025 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
5026 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
5027
5028 foreach my $opt (sort keys %$pending_delete_hash) {
5029 next if $selection && !$selection->{$opt};
5030 my $force = $pending_delete_hash->{$opt}->{force};
5031 eval {
5032 if ($opt eq 'hotplug') {
5033 die "skip\n" if ($conf->{hotplug} =~ /memory/);
5034 } elsif ($opt eq 'tablet') {
5035 die "skip\n" if !$hotplug_features->{usb};
5036 if ($defaults->{tablet}) {
5037 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5038 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5039 if $arch eq 'aarch64';
5040 } else {
5041 vm_deviceunplug($vmid, $conf, 'tablet');
5042 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
5043 }
5044 } elsif ($opt =~ m/^usb(\d+)$/) {
5045 my $index = $1;
5046 die "skip\n" if !$usb_hotplug;
5047 vm_deviceunplug($vmid, $conf, "usbredirdev$index"); # if it's a spice port
5048 vm_deviceunplug($vmid, $conf, $opt);
5049 } elsif ($opt eq 'vcpus') {
5050 die "skip\n" if !$hotplug_features->{cpu};
5051 qemu_cpu_hotplug($vmid, $conf, undef);
5052 } elsif ($opt eq 'balloon') {
5053 # enable balloon device is not hotpluggable
5054 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
5055 # here we reset the ballooning value to memory
5056 my $balloon = $conf->{memory} || $defaults->{memory};
5057 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
5058 } elsif ($fast_plug_option->{$opt}) {
5059 # do nothing
5060 } elsif ($opt =~ m/^net(\d+)$/) {
5061 die "skip\n" if !$hotplug_features->{network};
5062 vm_deviceunplug($vmid, $conf, $opt);
5063 } elsif (is_valid_drivename($opt)) {
5064 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
5065 vm_deviceunplug($vmid, $conf, $opt);
5066 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5067 } elsif ($opt =~ m/^memory$/) {
5068 die "skip\n" if !$hotplug_features->{memory};
5069 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults);
5070 } elsif ($opt eq 'cpuunits') {
5071 $cgroup->change_cpu_shares(undef);
5072 } elsif ($opt eq 'cpulimit') {
5073 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
5074 } else {
5075 die "skip\n";
5076 }
5077 };
5078 if (my $err = $@) {
5079 &$add_error($opt, $err) if $err ne "skip\n";
5080 } else {
5081 my $old = delete $conf->{$opt};
5082 $cloudinit_record_changed->($conf, $opt, $old, undef);
5083 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5084 }
5085 }
5086
5087 my $cloudinit_opt;
5088 foreach my $opt (keys %{$conf->{pending}}) {
5089 next if $selection && !$selection->{$opt};
5090 my $value = $conf->{pending}->{$opt};
5091 eval {
5092 if ($opt eq 'hotplug') {
5093 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
5094 } elsif ($opt eq 'tablet') {
5095 die "skip\n" if !$hotplug_features->{usb};
5096 if ($value == 1) {
5097 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5098 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5099 if $arch eq 'aarch64';
5100 } elsif ($value == 0) {
5101 vm_deviceunplug($vmid, $conf, 'tablet');
5102 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
5103 }
5104 } elsif ($opt =~ m/^usb(\d+)$/) {
5105 my $index = $1;
5106 die "skip\n" if !$usb_hotplug;
5107 my $d = eval { parse_property_string('pve-qm-usb', $value) };
5108 my $id = $opt;
5109 if ($d->{host} =~ m/^spice$/i) {
5110 $id = "usbredirdev$index";
5111 }
5112 qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type);
5113 } elsif ($opt eq 'vcpus') {
5114 die "skip\n" if !$hotplug_features->{cpu};
5115 qemu_cpu_hotplug($vmid, $conf, $value);
5116 } elsif ($opt eq 'balloon') {
5117 # enable/disable balloning device is not hotpluggable
5118 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
5119 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
5120 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
5121
5122 # allow manual ballooning if shares is set to zero
5123 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
5124 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
5125 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
5126 }
5127 } elsif ($opt =~ m/^net(\d+)$/) {
5128 # some changes can be done without hotplug
5129 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
5130 $vmid, $opt, $value, $arch, $machine_type);
5131 } elsif (is_valid_drivename($opt)) {
5132 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
5133 # some changes can be done without hotplug
5134 my $drive = parse_drive($opt, $value);
5135 if (drive_is_cloudinit($drive)) {
5136 $cloudinit_opt = [$opt, $drive];
5137 # apply all the other changes first, then generate the cloudinit disk
5138 die "skip\n";
5139 }
5140 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5141 $vmid, $opt, $value, $arch, $machine_type);
5142 } elsif ($opt =~ m/^memory$/) { #dimms
5143 die "skip\n" if !$hotplug_features->{memory};
5144 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $value);
5145 } elsif ($opt eq 'cpuunits') {
5146 my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
5147 $cgroup->change_cpu_shares($new_cpuunits);
5148 } elsif ($opt eq 'cpulimit') {
5149 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
5150 $cgroup->change_cpu_quota($cpulimit, 100000);
5151 } elsif ($opt eq 'agent') {
5152 vmconfig_update_agent($conf, $opt, $value);
5153 } else {
5154 die "skip\n"; # skip non-hot-pluggable options
5155 }
5156 };
5157 if (my $err = $@) {
5158 &$add_error($opt, $err) if $err ne "skip\n";
5159 } else {
5160 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $value);
5161 $conf->{$opt} = $value;
5162 delete $conf->{pending}->{$opt};
5163 }
5164 }
5165
5166 if (defined($cloudinit_opt)) {
5167 my ($opt, $drive) = @$cloudinit_opt;
5168 my $value = $conf->{pending}->{$opt};
5169 eval {
5170 my $temp = {%$conf, $opt => $value};
5171 PVE::QemuServer::Cloudinit::apply_cloudinit_config($temp, $vmid);
5172 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5173 $vmid, $opt, $value, $arch, $machine_type);
5174 };
5175 if (my $err = $@) {
5176 &$add_error($opt, $err) if $err ne "skip\n";
5177 } else {
5178 $conf->{$opt} = $value;
5179 delete $conf->{pending}->{$opt};
5180 }
5181 }
5182
5183 # unplug xhci controller if no usb device is left
5184 if ($usb_hotplug) {
5185 my $has_usb = 0;
5186 for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
5187 next if !defined($conf->{"usb$i"});
5188 $has_usb = 1;
5189 last;
5190 }
5191 if (!$has_usb) {
5192 vm_deviceunplug($vmid, $conf, 'xhci');
5193 }
5194 }
5195
5196 PVE::QemuConfig->write_config($vmid, $conf);
5197
5198 if ($hotplug_features->{cloudinit} && PVE::QemuServer::Cloudinit::has_changes($conf)) {
5199 PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid);
5200 }
5201 }
5202
5203 sub try_deallocate_drive {
5204 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
5205
5206 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
5207 my $volid = $drive->{file};
5208 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
5209 my $sid = PVE::Storage::parse_volume_id($volid);
5210 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
5211
5212 # check if the disk is really unused
5213 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
5214 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
5215 PVE::Storage::vdisk_free($storecfg, $volid);
5216 return 1;
5217 } else {
5218 # If vm is not owner of this disk remove from config
5219 return 1;
5220 }
5221 }
5222
5223 return;
5224 }
5225
5226 sub vmconfig_delete_or_detach_drive {
5227 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
5228
5229 my $drive = parse_drive($opt, $conf->{$opt});
5230
5231 my $rpcenv = PVE::RPCEnvironment::get();
5232 my $authuser = $rpcenv->get_user();
5233
5234 if ($force) {
5235 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
5236 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
5237 } else {
5238 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
5239 }
5240 }
5241
5242
5243
5244 sub vmconfig_apply_pending {
5245 my ($vmid, $conf, $storecfg, $errors, $skip_cloud_init) = @_;
5246
5247 return if !scalar(keys %{$conf->{pending}});
5248
5249 my $add_apply_error = sub {
5250 my ($opt, $msg) = @_;
5251 my $err_msg = "unable to apply pending change $opt : $msg";
5252 $errors->{$opt} = $err_msg;
5253 warn $err_msg;
5254 };
5255
5256 # cold plug
5257
5258 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
5259 foreach my $opt (sort keys %$pending_delete_hash) {
5260 my $force = $pending_delete_hash->{$opt}->{force};
5261 eval {
5262 if ($opt =~ m/^unused/) {
5263 die "internal error";
5264 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5265 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5266 }
5267 };
5268 if (my $err = $@) {
5269 $add_apply_error->($opt, $err);
5270 } else {
5271 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5272 delete $conf->{$opt};
5273 }
5274 }
5275
5276 PVE::QemuConfig->cleanup_pending($conf);
5277
5278 my $generate_cloudinit = $skip_cloud_init ? 0 : undef;
5279
5280 foreach my $opt (keys %{$conf->{pending}}) { # add/change
5281 next if $opt eq 'delete'; # just to be sure
5282 eval {
5283 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5284 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
5285 }
5286 };
5287 if (my $err = $@) {
5288 $add_apply_error->($opt, $err);
5289 } else {
5290
5291 if (is_valid_drivename($opt)) {
5292 my $drive = parse_drive($opt, $conf->{pending}->{$opt});
5293 $generate_cloudinit //= 1 if drive_is_cloudinit($drive);
5294 }
5295
5296 $conf->{$opt} = delete $conf->{pending}->{$opt};
5297 }
5298 }
5299
5300 # write all changes at once to avoid unnecessary i/o
5301 PVE::QemuConfig->write_config($vmid, $conf);
5302 if ($generate_cloudinit) {
5303 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5304 # After successful generation and if there were changes to be applied, update the
5305 # config to drop the {cloudinit} entry.
5306 PVE::QemuConfig->write_config($vmid, $conf);
5307 }
5308 }
5309 }
5310
5311 sub vmconfig_update_net {
5312 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5313
5314 my $newnet = parse_net($value);
5315
5316 if ($conf->{$opt}) {
5317 my $oldnet = parse_net($conf->{$opt});
5318
5319 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5320 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5321 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
5322 safe_num_ne($oldnet->{mtu}, $newnet->{mtu}) ||
5323 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
5324
5325 # for non online change, we try to hot-unplug
5326 die "skip\n" if !$hotplug;
5327 vm_deviceunplug($vmid, $conf, $opt);
5328 } else {
5329
5330 die "internal error" if $opt !~ m/net(\d+)/;
5331 my $iface = "tap${vmid}i$1";
5332
5333 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5334 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5335 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5336 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
5337 PVE::Network::tap_unplug($iface);
5338
5339 if ($have_sdn) {
5340 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5341 } else {
5342 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5343 }
5344 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
5345 # Rate can be applied on its own but any change above needs to
5346 # include the rate in tap_plug since OVS resets everything.
5347 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
5348 }
5349
5350 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
5351 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5352 }
5353
5354 return 1;
5355 }
5356 }
5357
5358 if ($hotplug) {
5359 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5360 } else {
5361 die "skip\n";
5362 }
5363 }
5364
5365 sub vmconfig_update_agent {
5366 my ($conf, $opt, $value) = @_;
5367
5368 die "skip\n" if !$conf->{$opt};
5369
5370 my $hotplug_options = { fstrim_cloned_disks => 1 };
5371
5372 my $old_agent = parse_guest_agent($conf);
5373 my $agent = parse_guest_agent({$opt => $value});
5374
5375 for my $option (keys %$agent) { # added/changed options
5376 next if defined($hotplug_options->{$option});
5377 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5378 }
5379
5380 for my $option (keys %$old_agent) { # removed options
5381 next if defined($hotplug_options->{$option});
5382 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5383 }
5384
5385 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
5386 }
5387
5388 sub vmconfig_update_disk {
5389 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5390
5391 my $drive = parse_drive($opt, $value);
5392
5393 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5394 my $media = $drive->{media} || 'disk';
5395 my $oldmedia = $old_drive->{media} || 'disk';
5396 die "unable to change media type\n" if $media ne $oldmedia;
5397
5398 if (!drive_is_cdrom($old_drive)) {
5399
5400 if ($drive->{file} ne $old_drive->{file}) {
5401
5402 die "skip\n" if !$hotplug;
5403
5404 # unplug and register as unused
5405 vm_deviceunplug($vmid, $conf, $opt);
5406 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5407
5408 } else {
5409 # update existing disk
5410
5411 # skip non hotpluggable value
5412 if (safe_string_ne($drive->{aio}, $old_drive->{aio}) ||
5413 safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5414 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5415 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5416 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5417 safe_string_ne($drive->{ssd}, $old_drive->{ssd}) ||
5418 safe_string_ne($drive->{ro}, $old_drive->{ro})) {
5419 die "skip\n";
5420 }
5421
5422 # apply throttle
5423 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5424 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5425 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5426 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5427 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5428 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5429 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5430 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5431 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5432 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5433 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5434 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5435 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5436 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5437 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5438 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5439 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5440 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5441
5442 qemu_block_set_io_throttle(
5443 $vmid,"drive-$opt",
5444 ($drive->{mbps} || 0)*1024*1024,
5445 ($drive->{mbps_rd} || 0)*1024*1024,
5446 ($drive->{mbps_wr} || 0)*1024*1024,
5447 $drive->{iops} || 0,
5448 $drive->{iops_rd} || 0,
5449 $drive->{iops_wr} || 0,
5450 ($drive->{mbps_max} || 0)*1024*1024,
5451 ($drive->{mbps_rd_max} || 0)*1024*1024,
5452 ($drive->{mbps_wr_max} || 0)*1024*1024,
5453 $drive->{iops_max} || 0,
5454 $drive->{iops_rd_max} || 0,
5455 $drive->{iops_wr_max} || 0,
5456 $drive->{bps_max_length} || 1,
5457 $drive->{bps_rd_max_length} || 1,
5458 $drive->{bps_wr_max_length} || 1,
5459 $drive->{iops_max_length} || 1,
5460 $drive->{iops_rd_max_length} || 1,
5461 $drive->{iops_wr_max_length} || 1,
5462 );
5463
5464 }
5465
5466 return 1;
5467 }
5468
5469 } else { # cdrom
5470
5471 if ($drive->{file} eq 'none') {
5472 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5473 if (drive_is_cloudinit($old_drive)) {
5474 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5475 }
5476 } else {
5477 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5478
5479 # force eject if locked
5480 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5481
5482 if ($path) {
5483 mon_cmd($vmid, "blockdev-change-medium",
5484 id => "$opt", filename => "$path");
5485 }
5486 }
5487
5488 return 1;
5489 }
5490 }
5491
5492 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5493 # hotplug new disks
5494 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5495 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5496 }
5497
5498 sub vmconfig_update_cloudinit_drive {
5499 my ($storecfg, $conf, $vmid) = @_;
5500
5501 my $cloudinit_ds = undef;
5502 my $cloudinit_drive = undef;
5503
5504 PVE::QemuConfig->foreach_volume($conf, sub {
5505 my ($ds, $drive) = @_;
5506 if (PVE::QemuServer::drive_is_cloudinit($drive)) {
5507 $cloudinit_ds = $ds;
5508 $cloudinit_drive = $drive;
5509 }
5510 });
5511
5512 return if !$cloudinit_drive;
5513
5514 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5515 PVE::QemuConfig->write_config($vmid, $conf);
5516 }
5517
5518 my $running = PVE::QemuServer::check_running($vmid);
5519
5520 if ($running) {
5521 my $path = PVE::Storage::path($storecfg, $cloudinit_drive->{file});
5522 if ($path) {
5523 mon_cmd($vmid, "eject", force => JSON::true, id => "$cloudinit_ds");
5524 mon_cmd($vmid, "blockdev-change-medium", id => "$cloudinit_ds", filename => "$path");
5525 }
5526 }
5527 }
5528
5529 # called in locked context by incoming migration
5530 sub vm_migrate_get_nbd_disks {
5531 my ($storecfg, $conf, $replicated_volumes) = @_;
5532
5533 my $local_volumes = {};
5534 PVE::QemuConfig->foreach_volume($conf, sub {
5535 my ($ds, $drive) = @_;
5536
5537 return if drive_is_cdrom($drive);
5538 return if $ds eq 'tpmstate0';
5539
5540 my $volid = $drive->{file};
5541
5542 return if !$volid;
5543
5544 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5545
5546 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5547 return if $scfg->{shared};
5548
5549 # replicated disks re-use existing state via bitmap
5550 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5551 $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
5552 });
5553 return $local_volumes;
5554 }
5555
5556 # called in locked context by incoming migration
5557 sub vm_migrate_alloc_nbd_disks {
5558 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5559
5560 my $nbd = {};
5561 foreach my $opt (sort keys %$source_volumes) {
5562 my ($volid, $storeid, $volname, $drive, $use_existing, $format) = @{$source_volumes->{$opt}};
5563
5564 if ($use_existing) {
5565 $nbd->{$opt}->{drivestr} = print_drive($drive);
5566 $nbd->{$opt}->{volid} = $volid;
5567 $nbd->{$opt}->{replicated} = 1;
5568 next;
5569 }
5570
5571 # storage mapping + volname = regular migration
5572 # storage mapping + format = remote migration
5573 # order of precedence, filtered by whether storage supports it:
5574 # 1. explicit requested format
5575 # 2. format of current volume
5576 # 3. default format of storage
5577 if (!$storagemap->{identity}) {
5578 $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
5579 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5580 if (!$format || !grep { $format eq $_ } @$validFormats) {
5581 if ($volname) {
5582 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5583 my $fileFormat = qemu_img_format($scfg, $volname);
5584 $format = $fileFormat
5585 if grep { $fileFormat eq $_ } @$validFormats;
5586 }
5587 $format //= $defFormat;
5588 }
5589 } else {
5590 # can't happen for remote migration, so $volname is always defined
5591 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5592 $format = qemu_img_format($scfg, $volname);
5593 }
5594
5595 my $size = $drive->{size} / 1024;
5596 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5597 my $newdrive = $drive;
5598 $newdrive->{format} = $format;
5599 $newdrive->{file} = $newvolid;
5600 my $drivestr = print_drive($newdrive);
5601 $nbd->{$opt}->{drivestr} = $drivestr;
5602 $nbd->{$opt}->{volid} = $newvolid;
5603 }
5604
5605 return $nbd;
5606 }
5607
5608 # see vm_start_nolock for parameters, additionally:
5609 # migrate_opts:
5610 # storagemap = parsed storage map for allocating NBD disks
5611 sub vm_start {
5612 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5613
5614 return PVE::QemuConfig->lock_config($vmid, sub {
5615 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5616
5617 die "you can't start a vm if it's a template\n"
5618 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5619
5620 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5621 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5622
5623 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5624
5625 if ($has_backup_lock && $running) {
5626 # a backup is currently running, attempt to start the guest in the
5627 # existing QEMU instance
5628 return vm_resume($vmid);
5629 }
5630
5631 PVE::QemuConfig->check_lock($conf)
5632 if !($params->{skiplock} || $has_suspended_lock);
5633
5634 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5635
5636 die "VM $vmid already running\n" if $running;
5637
5638 if (my $storagemap = $migrate_opts->{storagemap}) {
5639 my $replicated = $migrate_opts->{replicated_volumes};
5640 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5641 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5642
5643 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5644 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5645 }
5646 }
5647
5648 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5649 });
5650 }
5651
5652
5653 # params:
5654 # statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5655 # skiplock => 0/1, skip checking for config lock
5656 # skiptemplate => 0/1, skip checking whether VM is template
5657 # forcemachine => to force QEMU machine (rollback/migration)
5658 # forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5659 # timeout => in seconds
5660 # paused => start VM in paused state (backup)
5661 # resume => resume from hibernation
5662 # pbs-backing => {
5663 # sata0 => {
5664 # repository
5665 # snapshot
5666 # keyfile
5667 # archive
5668 # },
5669 # virtio2 => ...
5670 # }
5671 # migrate_opts:
5672 # nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5673 # migratedfrom => source node
5674 # spice_ticket => used for spice migration, passed via tunnel/stdin
5675 # network => CIDR of migration network
5676 # type => secure/insecure - tunnel over encrypted connection or plain-text
5677 # nbd_proto_version => int, 0 for TCP, 1 for UNIX
5678 # replicated_volumes => which volids should be re-used with bitmaps for nbd migration
5679 # offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
5680 # contained in config
5681 sub vm_start_nolock {
5682 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5683
5684 my $statefile = $params->{statefile};
5685 my $resume = $params->{resume};
5686
5687 my $migratedfrom = $migrate_opts->{migratedfrom};
5688 my $migration_type = $migrate_opts->{type};
5689
5690 my $res = {};
5691
5692 # clean up leftover reboot request files
5693 eval { clear_reboot_request($vmid); };
5694 warn $@ if $@;
5695
5696 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5697 vmconfig_apply_pending($vmid, $conf, $storecfg);
5698 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5699 }
5700
5701 # don't regenerate the ISO if the VM is started as part of a live migration
5702 # this way we can reuse the old ISO with the correct config
5703 if (!$migratedfrom) {
5704 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5705 # FIXME: apply_cloudinit_config updates $conf in this case, and it would only drop
5706 # $conf->{cloudinit}, so we could just not do this?
5707 # But we do it above, so for now let's be consistent.
5708 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5709 }
5710 }
5711
5712 # override offline migrated volumes, conf is out of date still
5713 if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
5714 for my $key (sort keys $offline_volumes->%*) {
5715 my $parsed = parse_drive($key, $conf->{$key});
5716 $parsed->{file} = $offline_volumes->{$key};
5717 $conf->{$key} = print_drive($parsed);
5718 }
5719 }
5720
5721 my $defaults = load_defaults();
5722
5723 # set environment variable useful inside network script
5724 # for remote migration the config is available on the target node!
5725 if (!$migrate_opts->{remote_node}) {
5726 $ENV{PVE_MIGRATED_FROM} = $migratedfrom;
5727 }
5728
5729 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5730
5731 my $forcemachine = $params->{forcemachine};
5732 my $forcecpu = $params->{forcecpu};
5733 if ($resume) {
5734 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5735 $forcemachine = $conf->{runningmachine};
5736 $forcecpu = $conf->{runningcpu};
5737 print "Resuming suspended VM\n";
5738 }
5739
5740 my ($cmd, $vollist, $spice_port, $pci_devices) = config_to_command($storecfg, $vmid,
5741 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
5742
5743 my $migration_ip;
5744 my $get_migration_ip = sub {
5745 my ($nodename) = @_;
5746
5747 return $migration_ip if defined($migration_ip);
5748
5749 my $cidr = $migrate_opts->{network};
5750
5751 if (!defined($cidr)) {
5752 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5753 $cidr = $dc_conf->{migration}->{network};
5754 }
5755
5756 if (defined($cidr)) {
5757 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5758
5759 die "could not get IP: no address configured on local " .
5760 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5761
5762 die "could not get IP: multiple addresses configured on local " .
5763 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5764
5765 $migration_ip = @$ips[0];
5766 }
5767
5768 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5769 if !defined($migration_ip);
5770
5771 return $migration_ip;
5772 };
5773
5774 if ($statefile) {
5775 if ($statefile eq 'tcp') {
5776 my $migrate = $res->{migrate} = { proto => 'tcp' };
5777 $migrate->{addr} = "localhost";
5778 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5779 my $nodename = nodename();
5780
5781 if (!defined($migration_type)) {
5782 if (defined($datacenterconf->{migration}->{type})) {
5783 $migration_type = $datacenterconf->{migration}->{type};
5784 } else {
5785 $migration_type = 'secure';
5786 }
5787 }
5788
5789 if ($migration_type eq 'insecure') {
5790 $migrate->{addr} = $get_migration_ip->($nodename);
5791 $migrate->{addr} = "[$migrate->{addr}]" if Net::IP::ip_is_ipv6($migrate->{addr});
5792 }
5793
5794 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5795 $migrate->{port} = PVE::Tools::next_migrate_port($pfamily);
5796 $migrate->{uri} = "tcp:$migrate->{addr}:$migrate->{port}";
5797 push @$cmd, '-incoming', $migrate->{uri};
5798 push @$cmd, '-S';
5799
5800 } elsif ($statefile eq 'unix') {
5801 # should be default for secure migrations as a ssh TCP forward
5802 # tunnel is not deterministic reliable ready and fails regurarly
5803 # to set up in time, so use UNIX socket forwards
5804 my $migrate = $res->{migrate} = { proto => 'unix' };
5805 $migrate->{addr} = "/run/qemu-server/$vmid.migrate";
5806 unlink $migrate->{addr};
5807
5808 $migrate->{uri} = "unix:$migrate->{addr}";
5809 push @$cmd, '-incoming', $migrate->{uri};
5810 push @$cmd, '-S';
5811
5812 } elsif (-e $statefile) {
5813 push @$cmd, '-loadstate', $statefile;
5814 } else {
5815 my $statepath = PVE::Storage::path($storecfg, $statefile);
5816 push @$vollist, $statefile;
5817 push @$cmd, '-loadstate', $statepath;
5818 }
5819 } elsif ($params->{paused}) {
5820 push @$cmd, '-S';
5821 }
5822
5823 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5824
5825 my $pci_reserve_list = [];
5826 for my $device (values $pci_devices->%*) {
5827 next if $device->{mdev}; # we don't reserve for mdev devices
5828 push $pci_reserve_list->@*, map { $_->{id} } $device->{ids}->@*;
5829 }
5830
5831 # reserve all PCI IDs before actually doing anything with them
5832 PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, $start_timeout);
5833
5834 eval {
5835 my $uuid;
5836 for my $id (sort keys %$pci_devices) {
5837 my $d = $pci_devices->{$id};
5838 my ($index) = ($id =~ m/^hostpci(\d+)$/);
5839
5840 my $chosen_mdev;
5841 for my $dev ($d->{ids}->@*) {
5842 my $info = eval { PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $index, $d->{mdev}) };
5843 if ($d->{mdev}) {
5844 warn $@ if $@;
5845 $chosen_mdev = $info;
5846 last if $chosen_mdev; # if successful, we're done
5847 } else {
5848 die $@ if $@;
5849 }
5850 }
5851
5852 next if !$d->{mdev};
5853 die "could not create mediated device\n" if !defined($chosen_mdev);
5854
5855 # nvidia grid needs the uuid of the mdev as qemu parameter
5856 if (!defined($uuid) && $chosen_mdev->{vendor} =~ m/^(0x)?10de$/) {
5857 if (defined($conf->{smbios1})) {
5858 my $smbios_conf = parse_smbios1($conf->{smbios1});
5859 $uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid});
5860 }
5861 $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $index) if !defined($uuid);
5862 }
5863 }
5864 push @$cmd, '-uuid', $uuid if defined($uuid);
5865 };
5866 if (my $err = $@) {
5867 eval { cleanup_pci_devices($vmid, $conf) };
5868 warn $@ if $@;
5869 die $err;
5870 }
5871
5872 PVE::Storage::activate_volumes($storecfg, $vollist);
5873
5874 eval {
5875 run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{});
5876 };
5877 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5878 # timeout should be more than enough here...
5879 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
5880
5881 my $cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
5882
5883 my %run_params = (
5884 timeout => $statefile ? undef : $start_timeout,
5885 umask => 0077,
5886 noerr => 1,
5887 );
5888
5889 # when migrating, prefix QEMU output so other side can pick up any
5890 # errors that might occur and show the user
5891 if ($migratedfrom) {
5892 $run_params{quiet} = 1;
5893 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5894 }
5895
5896 my %systemd_properties = (
5897 Slice => 'qemu.slice',
5898 KillMode => 'process',
5899 SendSIGKILL => 0,
5900 TimeoutStopUSec => ULONG_MAX, # infinity
5901 );
5902
5903 if (PVE::CGroup::cgroup_mode() == 2) {
5904 $systemd_properties{CPUWeight} = $cpuunits;
5905 } else {
5906 $systemd_properties{CPUShares} = $cpuunits;
5907 }
5908
5909 if (my $cpulimit = $conf->{cpulimit}) {
5910 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5911 }
5912 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5913
5914 my $run_qemu = sub {
5915 PVE::Tools::run_fork sub {
5916 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5917
5918 my $tpmpid;
5919 if (my $tpm = $conf->{tpmstate0}) {
5920 # start the TPM emulator so QEMU can connect on start
5921 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5922 }
5923
5924 my $exitcode = run_command($cmd, %run_params);
5925 if ($exitcode) {
5926 if ($tpmpid) {
5927 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5928 kill 'TERM', $tpmpid;
5929 }
5930 die "QEMU exited with code $exitcode\n";
5931 }
5932 };
5933 };
5934
5935 if ($conf->{hugepages}) {
5936
5937 my $code = sub {
5938 my $hotplug_features =
5939 parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
5940 my $hugepages_topology =
5941 PVE::QemuServer::Memory::hugepages_topology($conf, $hotplug_features->{memory});
5942
5943 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5944
5945 PVE::QemuServer::Memory::hugepages_mount();
5946 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5947
5948 eval { $run_qemu->() };
5949 if (my $err = $@) {
5950 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5951 if !$conf->{keephugepages};
5952 die $err;
5953 }
5954
5955 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5956 if !$conf->{keephugepages};
5957 };
5958 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5959
5960 } else {
5961 eval { $run_qemu->() };
5962 }
5963
5964 if (my $err = $@) {
5965 # deactivate volumes if start fails
5966 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5967 warn $@ if $@;
5968 eval { cleanup_pci_devices($vmid, $conf) };
5969 warn $@ if $@;
5970
5971 die "start failed: $err";
5972 }
5973
5974 # re-reserve all PCI IDs now that we can know the actual VM PID
5975 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5976 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, undef, $pid) };
5977 warn $@ if $@;
5978
5979 if (defined($res->{migrate})) {
5980 print "migration listens on $res->{migrate}->{uri}\n";
5981 } elsif ($statefile) {
5982 eval { mon_cmd($vmid, "cont"); };
5983 warn $@ if $@;
5984 }
5985
5986 #start nbd server for storage migration
5987 if (my $nbd = $migrate_opts->{nbd}) {
5988 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
5989
5990 my $migrate_storage_uri;
5991 # nbd_protocol_version > 0 for unix socket support
5992 if ($nbd_protocol_version > 0 && ($migration_type eq 'secure' || $migration_type eq 'websocket')) {
5993 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5994 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5995 $migrate_storage_uri = "nbd:unix:$socket_path";
5996 $res->{migrate}->{unix_sockets} = [$socket_path];
5997 } else {
5998 my $nodename = nodename();
5999 my $localip = $get_migration_ip->($nodename);
6000 my $pfamily = PVE::Tools::get_host_address_family($nodename);
6001 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
6002
6003 mon_cmd($vmid, "nbd-server-start", addr => {
6004 type => 'inet',
6005 data => {
6006 host => "${localip}",
6007 port => "${storage_migrate_port}",
6008 },
6009 });
6010 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
6011 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
6012 }
6013
6014 my $block_info = mon_cmd($vmid, "query-block");
6015 $block_info = { map { $_->{device} => $_ } $block_info->@* };
6016
6017 foreach my $opt (sort keys %$nbd) {
6018 my $drivestr = $nbd->{$opt}->{drivestr};
6019 my $volid = $nbd->{$opt}->{volid};
6020
6021 my $block_node = $block_info->{"drive-$opt"}->{inserted}->{'node-name'};
6022
6023 mon_cmd(
6024 $vmid,
6025 "block-export-add",
6026 id => "drive-$opt",
6027 'node-name' => $block_node,
6028 writable => JSON::true,
6029 type => "nbd",
6030 name => "drive-$opt", # NBD export name
6031 );
6032
6033 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
6034 print "storage migration listens on $nbd_uri volume:$drivestr\n";
6035 print "re-using replicated volume: $opt - $volid\n"
6036 if $nbd->{$opt}->{replicated};
6037
6038 $res->{drives}->{$opt} = $nbd->{$opt};
6039 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
6040 }
6041 }
6042
6043 if ($migratedfrom) {
6044 eval {
6045 set_migration_caps($vmid);
6046 };
6047 warn $@ if $@;
6048
6049 if ($spice_port) {
6050 print "spice listens on port $spice_port\n";
6051 $res->{spice_port} = $spice_port;
6052 if ($migrate_opts->{spice_ticket}) {
6053 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
6054 $migrate_opts->{spice_ticket});
6055 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
6056 }
6057 }
6058
6059 } else {
6060 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
6061 if !$statefile && $conf->{balloon};
6062
6063 foreach my $opt (keys %$conf) {
6064 next if $opt !~ m/^net\d+$/;
6065 my $nicconf = parse_net($conf->{$opt});
6066 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
6067 }
6068 add_nets_bridge_fdb($conf, $vmid);
6069 }
6070
6071 if (!defined($conf->{balloon}) || $conf->{balloon}) {
6072 eval {
6073 mon_cmd(
6074 $vmid,
6075 'qom-set',
6076 path => "machine/peripheral/balloon0",
6077 property => "guest-stats-polling-interval",
6078 value => 2
6079 );
6080 };
6081 log_warn("could not set polling interval for ballooning - $@") if $@;
6082 }
6083
6084 if ($resume) {
6085 print "Resumed VM, removing state\n";
6086 if (my $vmstate = $conf->{vmstate}) {
6087 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6088 PVE::Storage::vdisk_free($storecfg, $vmstate);
6089 }
6090 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
6091 PVE::QemuConfig->write_config($vmid, $conf);
6092 }
6093
6094 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
6095
6096 return $res;
6097 }
6098
6099 sub vm_commandline {
6100 my ($storecfg, $vmid, $snapname) = @_;
6101
6102 my $conf = PVE::QemuConfig->load_config($vmid);
6103
6104 my ($forcemachine, $forcecpu);
6105 if ($snapname) {
6106 my $snapshot = $conf->{snapshots}->{$snapname};
6107 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
6108
6109 # check for machine or CPU overrides in snapshot
6110 $forcemachine = $snapshot->{runningmachine};
6111 $forcecpu = $snapshot->{runningcpu};
6112
6113 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
6114
6115 $conf = $snapshot;
6116 }
6117
6118 my $defaults = load_defaults();
6119
6120 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
6121
6122 return PVE::Tools::cmd2string($cmd);
6123 }
6124
6125 sub vm_reset {
6126 my ($vmid, $skiplock) = @_;
6127
6128 PVE::QemuConfig->lock_config($vmid, sub {
6129
6130 my $conf = PVE::QemuConfig->load_config($vmid);
6131
6132 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6133
6134 mon_cmd($vmid, "system_reset");
6135 });
6136 }
6137
6138 sub get_vm_volumes {
6139 my ($conf) = @_;
6140
6141 my $vollist = [];
6142 foreach_volid($conf, sub {
6143 my ($volid, $attr) = @_;
6144
6145 return if $volid =~ m|^/|;
6146
6147 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
6148 return if !$sid;
6149
6150 push @$vollist, $volid;
6151 });
6152
6153 return $vollist;
6154 }
6155
6156 sub cleanup_pci_devices {
6157 my ($vmid, $conf) = @_;
6158
6159 foreach my $key (keys %$conf) {
6160 next if $key !~ m/^hostpci(\d+)$/;
6161 my $hostpciindex = $1;
6162 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
6163 my $d = parse_hostpci($conf->{$key});
6164 if ($d->{mdev}) {
6165 # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
6166 # don't want to break ABI just for this two liner
6167 my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
6168
6169 # some nvidia vgpu driver versions want to clean the mdevs up themselves, and error
6170 # out when we do it first. so wait for 10 seconds and then try it
6171 if ($d->{ids}->[0]->[0]->{vendor} =~ m/^(0x)?10de$/) {
6172 sleep 10;
6173 }
6174
6175 PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1") if -e $dev_sysfs_dir;
6176 }
6177 }
6178 PVE::QemuServer::PCI::remove_pci_reservation($vmid);
6179 }
6180
6181 sub vm_stop_cleanup {
6182 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
6183
6184 eval {
6185
6186 if (!$keepActive) {
6187 my $vollist = get_vm_volumes($conf);
6188 PVE::Storage::deactivate_volumes($storecfg, $vollist);
6189
6190 if (my $tpmdrive = $conf->{tpmstate0}) {
6191 my $tpm = parse_drive("tpmstate0", $tpmdrive);
6192 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
6193 if ($storeid) {
6194 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
6195 }
6196 }
6197 }
6198
6199 foreach my $ext (qw(mon qmp pid vnc qga)) {
6200 unlink "/var/run/qemu-server/${vmid}.$ext";
6201 }
6202
6203 if ($conf->{ivshmem}) {
6204 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
6205 # just delete it for now, VMs which have this already open do not
6206 # are affected, but new VMs will get a separated one. If this
6207 # becomes an issue we either add some sort of ref-counting or just
6208 # add a "don't delete on stop" flag to the ivshmem format.
6209 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
6210 }
6211
6212 cleanup_pci_devices($vmid, $conf);
6213
6214 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
6215 };
6216 warn $@ if $@; # avoid errors - just warn
6217 }
6218
6219 # call only in locked context
6220 sub _do_vm_stop {
6221 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
6222
6223 my $pid = check_running($vmid, $nocheck);
6224 return if !$pid;
6225
6226 my $conf;
6227 if (!$nocheck) {
6228 $conf = PVE::QemuConfig->load_config($vmid);
6229 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6230 if (!defined($timeout) && $shutdown && $conf->{startup}) {
6231 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
6232 $timeout = $opts->{down} if $opts->{down};
6233 }
6234 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
6235 }
6236
6237 eval {
6238 if ($shutdown) {
6239 if (defined($conf) && get_qga_key($conf, 'enabled')) {
6240 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
6241 } else {
6242 mon_cmd($vmid, "system_powerdown");
6243 }
6244 } else {
6245 mon_cmd($vmid, "quit");
6246 }
6247 };
6248 my $err = $@;
6249
6250 if (!$err) {
6251 $timeout = 60 if !defined($timeout);
6252
6253 my $count = 0;
6254 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6255 $count++;
6256 sleep 1;
6257 }
6258
6259 if ($count >= $timeout) {
6260 if ($force) {
6261 warn "VM still running - terminating now with SIGTERM\n";
6262 kill 15, $pid;
6263 } else {
6264 die "VM quit/powerdown failed - got timeout\n";
6265 }
6266 } else {
6267 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6268 return;
6269 }
6270 } else {
6271 if (!check_running($vmid, $nocheck)) {
6272 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
6273 return;
6274 }
6275 if ($force) {
6276 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
6277 kill 15, $pid;
6278 } else {
6279 die "VM quit/powerdown failed\n";
6280 }
6281 }
6282
6283 # wait again
6284 $timeout = 10;
6285
6286 my $count = 0;
6287 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6288 $count++;
6289 sleep 1;
6290 }
6291
6292 if ($count >= $timeout) {
6293 warn "VM still running - terminating now with SIGKILL\n";
6294 kill 9, $pid;
6295 sleep 1;
6296 }
6297
6298 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6299 }
6300
6301 # Note: use $nocheck to skip tests if VM configuration file exists.
6302 # We need that when migration VMs to other nodes (files already moved)
6303 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
6304 sub vm_stop {
6305 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
6306
6307 $force = 1 if !defined($force) && !$shutdown;
6308
6309 if ($migratedfrom){
6310 my $pid = check_running($vmid, $nocheck, $migratedfrom);
6311 kill 15, $pid if $pid;
6312 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
6313 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
6314 return;
6315 }
6316
6317 PVE::QemuConfig->lock_config($vmid, sub {
6318 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
6319 });
6320 }
6321
6322 sub vm_reboot {
6323 my ($vmid, $timeout) = @_;
6324
6325 PVE::QemuConfig->lock_config($vmid, sub {
6326 eval {
6327
6328 # only reboot if running, as qmeventd starts it again on a stop event
6329 return if !check_running($vmid);
6330
6331 create_reboot_request($vmid);
6332
6333 my $storecfg = PVE::Storage::config();
6334 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
6335
6336 };
6337 if (my $err = $@) {
6338 # avoid that the next normal shutdown will be confused for a reboot
6339 clear_reboot_request($vmid);
6340 die $err;
6341 }
6342 });
6343 }
6344
6345 # note: if using the statestorage parameter, the caller has to check privileges
6346 sub vm_suspend {
6347 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
6348
6349 my $conf;
6350 my $path;
6351 my $storecfg;
6352 my $vmstate;
6353
6354 PVE::QemuConfig->lock_config($vmid, sub {
6355
6356 $conf = PVE::QemuConfig->load_config($vmid);
6357
6358 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
6359 PVE::QemuConfig->check_lock($conf)
6360 if !($skiplock || $is_backing_up);
6361
6362 die "cannot suspend to disk during backup\n"
6363 if $is_backing_up && $includestate;
6364
6365 if ($includestate) {
6366 $conf->{lock} = 'suspending';
6367 my $date = strftime("%Y-%m-%d", localtime(time()));
6368 $storecfg = PVE::Storage::config();
6369 if (!$statestorage) {
6370 $statestorage = find_vmstate_storage($conf, $storecfg);
6371 # check permissions for the storage
6372 my $rpcenv = PVE::RPCEnvironment::get();
6373 if ($rpcenv->{type} ne 'cli') {
6374 my $authuser = $rpcenv->get_user();
6375 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
6376 }
6377 }
6378
6379
6380 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
6381 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
6382 $path = PVE::Storage::path($storecfg, $vmstate);
6383 PVE::QemuConfig->write_config($vmid, $conf);
6384 } else {
6385 mon_cmd($vmid, "stop");
6386 }
6387 });
6388
6389 if ($includestate) {
6390 # save vm state
6391 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
6392
6393 eval {
6394 set_migration_caps($vmid, 1);
6395 mon_cmd($vmid, "savevm-start", statefile => $path);
6396 for(;;) {
6397 my $state = mon_cmd($vmid, "query-savevm");
6398 if (!$state->{status}) {
6399 die "savevm not active\n";
6400 } elsif ($state->{status} eq 'active') {
6401 sleep(1);
6402 next;
6403 } elsif ($state->{status} eq 'completed') {
6404 print "State saved, quitting\n";
6405 last;
6406 } elsif ($state->{status} eq 'failed' && $state->{error}) {
6407 die "query-savevm failed with error '$state->{error}'\n"
6408 } else {
6409 die "query-savevm returned status '$state->{status}'\n";
6410 }
6411 }
6412 };
6413 my $err = $@;
6414
6415 PVE::QemuConfig->lock_config($vmid, sub {
6416 $conf = PVE::QemuConfig->load_config($vmid);
6417 if ($err) {
6418 # cleanup, but leave suspending lock, to indicate something went wrong
6419 eval {
6420 mon_cmd($vmid, "savevm-end");
6421 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6422 PVE::Storage::vdisk_free($storecfg, $vmstate);
6423 delete $conf->@{qw(vmstate runningmachine runningcpu)};
6424 PVE::QemuConfig->write_config($vmid, $conf);
6425 };
6426 warn $@ if $@;
6427 die $err;
6428 }
6429
6430 die "lock changed unexpectedly\n"
6431 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6432
6433 mon_cmd($vmid, "quit");
6434 $conf->{lock} = 'suspended';
6435 PVE::QemuConfig->write_config($vmid, $conf);
6436 });
6437 }
6438 }
6439
6440 # $nocheck is set when called as part of a migration - in this context the
6441 # location of the config file (source or target node) is not deterministic,
6442 # since migration cannot wait for pmxcfs to process the rename
6443 sub vm_resume {
6444 my ($vmid, $skiplock, $nocheck) = @_;
6445
6446 PVE::QemuConfig->lock_config($vmid, sub {
6447 my $res = mon_cmd($vmid, 'query-status');
6448 my $resume_cmd = 'cont';
6449 my $reset = 0;
6450 my $conf;
6451 if ($nocheck) {
6452 $conf = eval { PVE::QemuConfig->load_config($vmid) }; # try on target node
6453 if ($@) {
6454 my $vmlist = PVE::Cluster::get_vmlist();
6455 if (exists($vmlist->{ids}->{$vmid})) {
6456 my $node = $vmlist->{ids}->{$vmid}->{node};
6457 $conf = eval { PVE::QemuConfig->load_config($vmid, $node) }; # try on source node
6458 }
6459 if (!$conf) {
6460 PVE::Cluster::cfs_update(); # vmlist was wrong, invalidate cache
6461 $conf = PVE::QemuConfig->load_config($vmid); # last try on target node again
6462 }
6463 }
6464 } else {
6465 $conf = PVE::QemuConfig->load_config($vmid);
6466 }
6467
6468 if ($res->{status}) {
6469 return if $res->{status} eq 'running'; # job done, go home
6470 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6471 $reset = 1 if $res->{status} eq 'shutdown';
6472 }
6473
6474 if (!$nocheck) {
6475 PVE::QemuConfig->check_lock($conf)
6476 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
6477 }
6478
6479 if ($reset) {
6480 # required if a VM shuts down during a backup and we get a resume
6481 # request before the backup finishes for example
6482 mon_cmd($vmid, "system_reset");
6483 }
6484
6485 add_nets_bridge_fdb($conf, $vmid) if $resume_cmd eq 'cont';
6486
6487 mon_cmd($vmid, $resume_cmd);
6488 });
6489 }
6490
6491 sub vm_sendkey {
6492 my ($vmid, $skiplock, $key) = @_;
6493
6494 PVE::QemuConfig->lock_config($vmid, sub {
6495
6496 my $conf = PVE::QemuConfig->load_config($vmid);
6497
6498 # there is no qmp command, so we use the human monitor command
6499 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
6500 die $res if $res ne '';
6501 });
6502 }
6503
6504 sub check_bridge_access {
6505 my ($rpcenv, $authuser, $conf) = @_;
6506
6507 return 1 if $authuser eq 'root@pam';
6508
6509 for my $opt (sort keys $conf->%*) {
6510 next if $opt !~ m/^net\d+$/;
6511 my $net = parse_net($conf->{$opt});
6512 my ($bridge, $tag, $trunks) = $net->@{'bridge', 'tag', 'trunks'};
6513 PVE::GuestHelpers::check_vnet_access($rpcenv, $authuser, $bridge, $tag, $trunks);
6514 }
6515 return 1;
6516 };
6517
6518 sub check_mapping_access {
6519 my ($rpcenv, $user, $conf) = @_;
6520
6521 for my $opt (keys $conf->%*) {
6522 if ($opt =~ m/^usb\d+$/) {
6523 my $device = PVE::JSONSchema::parse_property_string('pve-qm-usb', $conf->{$opt});
6524 if (my $host = $device->{host}) {
6525 die "only root can set '$opt' config for real devices\n"
6526 if $host !~ m/^spice$/i && $user ne 'root@pam';
6527 } elsif ($device->{mapping}) {
6528 $rpcenv->check_full($user, "/mapping/usb/$device->{mapping}", ['Mapping.Use']);
6529 } else {
6530 die "either 'host' or 'mapping' must be set.\n";
6531 }
6532 } elsif ($opt =~ m/^hostpci\d+$/) {
6533 my $device = PVE::JSONSchema::parse_property_string('pve-qm-hostpci', $conf->{$opt});
6534 if ($device->{host}) {
6535 die "only root can set '$opt' config for non-mapped devices\n" if $user ne 'root@pam';
6536 } elsif ($device->{mapping}) {
6537 $rpcenv->check_full($user, "/mapping/pci/$device->{mapping}", ['Mapping.Use']);
6538 } else {
6539 die "either 'host' or 'mapping' must be set.\n";
6540 }
6541 }
6542 }
6543 };
6544
6545 # FIXME: improve checks on restore by checking before actually extracing and
6546 # merging the new config
6547 sub check_restore_permissions {
6548 my ($rpcenv, $user, $conf) = @_;
6549 check_bridge_access($rpcenv, $user, $conf);
6550 check_mapping_access($rpcenv, $user, $conf);
6551 }
6552 # vzdump restore implementaion
6553
6554 sub tar_archive_read_firstfile {
6555 my $archive = shift;
6556
6557 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6558
6559 # try to detect archive type first
6560 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
6561 die "unable to open file '$archive'\n";
6562 my $firstfile = <$fh>;
6563 kill 15, $pid;
6564 close $fh;
6565
6566 die "ERROR: archive contaions no data\n" if !$firstfile;
6567 chomp $firstfile;
6568
6569 return $firstfile;
6570 }
6571
6572 sub tar_restore_cleanup {
6573 my ($storecfg, $statfile) = @_;
6574
6575 print STDERR "starting cleanup\n";
6576
6577 if (my $fd = IO::File->new($statfile, "r")) {
6578 while (defined(my $line = <$fd>)) {
6579 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6580 my $volid = $2;
6581 eval {
6582 if ($volid =~ m|^/|) {
6583 unlink $volid || die 'unlink failed\n';
6584 } else {
6585 PVE::Storage::vdisk_free($storecfg, $volid);
6586 }
6587 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6588 };
6589 print STDERR "unable to cleanup '$volid' - $@" if $@;
6590 } else {
6591 print STDERR "unable to parse line in statfile - $line";
6592 }
6593 }
6594 $fd->close();
6595 }
6596 }
6597
6598 sub restore_file_archive {
6599 my ($archive, $vmid, $user, $opts) = @_;
6600
6601 return restore_vma_archive($archive, $vmid, $user, $opts)
6602 if $archive eq '-';
6603
6604 my $info = PVE::Storage::archive_info($archive);
6605 my $format = $opts->{format} // $info->{format};
6606 my $comp = $info->{compression};
6607
6608 # try to detect archive format
6609 if ($format eq 'tar') {
6610 return restore_tar_archive($archive, $vmid, $user, $opts);
6611 } else {
6612 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6613 }
6614 }
6615
6616 # hepler to remove disks that will not be used after restore
6617 my $restore_cleanup_oldconf = sub {
6618 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6619
6620 my $kept_disks = {};
6621
6622 PVE::QemuConfig->foreach_volume($oldconf, sub {
6623 my ($ds, $drive) = @_;
6624
6625 return if drive_is_cdrom($drive, 1);
6626
6627 my $volid = $drive->{file};
6628 return if !$volid || $volid =~ m|^/|;
6629
6630 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6631 return if !$path || !$owner || ($owner != $vmid);
6632
6633 # Note: only delete disk we want to restore
6634 # other volumes will become unused
6635 if ($virtdev_hash->{$ds}) {
6636 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6637 if (my $err = $@) {
6638 warn $err;
6639 }
6640 } else {
6641 $kept_disks->{$volid} = 1;
6642 }
6643 });
6644
6645 # after the restore we have no snapshots anymore
6646 for my $snapname (keys $oldconf->{snapshots}->%*) {
6647 my $snap = $oldconf->{snapshots}->{$snapname};
6648 if ($snap->{vmstate}) {
6649 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6650 if (my $err = $@) {
6651 warn $err;
6652 }
6653 }
6654
6655 for my $volid (keys $kept_disks->%*) {
6656 eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname); };
6657 warn $@ if $@;
6658 }
6659 }
6660 };
6661
6662 # Helper to parse vzdump backup device hints
6663 #
6664 # $rpcenv: Environment, used to ckeck storage permissions
6665 # $user: User ID, to check storage permissions
6666 # $storecfg: Storage configuration
6667 # $fh: the file handle for reading the configuration
6668 # $devinfo: should contain device sizes for all backu-up'ed devices
6669 # $options: backup options (pool, default storage)
6670 #
6671 # Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6672 my $parse_backup_hints = sub {
6673 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6674
6675 my $check_storage = sub { # assert if an image can be allocate
6676 my ($storeid, $scfg) = @_;
6677 die "Content type 'images' is not available on storage '$storeid'\n"
6678 if !$scfg->{content}->{images};
6679 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace'])
6680 if $user ne 'root@pam';
6681 };
6682
6683 my $virtdev_hash = {};
6684 while (defined(my $line = <$fh>)) {
6685 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6686 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6687 die "archive does not contain data for drive '$virtdev'\n"
6688 if !$devinfo->{$devname};
6689
6690 if (defined($options->{storage})) {
6691 $storeid = $options->{storage} || 'local';
6692 } elsif (!$storeid) {
6693 $storeid = 'local';
6694 }
6695 $format = 'raw' if !$format;
6696 $devinfo->{$devname}->{devname} = $devname;
6697 $devinfo->{$devname}->{virtdev} = $virtdev;
6698 $devinfo->{$devname}->{format} = $format;
6699 $devinfo->{$devname}->{storeid} = $storeid;
6700
6701 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6702 $check_storage->($storeid, $scfg); # permission and content type check
6703
6704 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6705 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6706 my $virtdev = $1;
6707 my $drive = parse_drive($virtdev, $2);
6708
6709 if (drive_is_cloudinit($drive)) {
6710 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6711 $storeid = $options->{storage} if defined ($options->{storage});
6712 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6713 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6714
6715 $check_storage->($storeid, $scfg); # permission and content type check
6716
6717 $virtdev_hash->{$virtdev} = {
6718 format => $format,
6719 storeid => $storeid,
6720 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6721 is_cloudinit => 1,
6722 };
6723 }
6724 }
6725 }
6726
6727 return $virtdev_hash;
6728 };
6729
6730 # Helper to allocate and activate all volumes required for a restore
6731 #
6732 # $storecfg: Storage configuration
6733 # $virtdev_hash: as returned by parse_backup_hints()
6734 #
6735 # Returns: { $virtdev => $volid }
6736 my $restore_allocate_devices = sub {
6737 my ($storecfg, $virtdev_hash, $vmid) = @_;
6738
6739 my $map = {};
6740 foreach my $virtdev (sort keys %$virtdev_hash) {
6741 my $d = $virtdev_hash->{$virtdev};
6742 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6743 my $storeid = $d->{storeid};
6744 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6745
6746 # test if requested format is supported
6747 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6748 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6749 $d->{format} = $defFormat if !$supported;
6750
6751 my $name;
6752 if ($d->{is_cloudinit}) {
6753 $name = "vm-$vmid-cloudinit";
6754 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6755 if ($scfg->{path}) {
6756 $name .= ".$d->{format}";
6757 }
6758 }
6759
6760 my $volid = PVE::Storage::vdisk_alloc(
6761 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6762
6763 print STDERR "new volume ID is '$volid'\n";
6764 $d->{volid} = $volid;
6765
6766 PVE::Storage::activate_volumes($storecfg, [$volid]);
6767
6768 $map->{$virtdev} = $volid;
6769 }
6770
6771 return $map;
6772 };
6773
6774 sub restore_update_config_line {
6775 my ($cookie, $map, $line, $unique) = @_;
6776
6777 return '' if $line =~ m/^\#qmdump\#/;
6778 return '' if $line =~ m/^\#vzdump\#/;
6779 return '' if $line =~ m/^lock:/;
6780 return '' if $line =~ m/^unused\d+:/;
6781 return '' if $line =~ m/^parent:/;
6782
6783 my $res = '';
6784
6785 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6786 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6787 # try to convert old 1.X settings
6788 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6789 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6790 my ($model, $macaddr) = split(/\=/, $devconfig);
6791 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6792 my $net = {
6793 model => $model,
6794 bridge => "vmbr$ind",
6795 macaddr => $macaddr,
6796 };
6797 my $netstr = print_net($net);
6798
6799 $res .= "net$cookie->{netcount}: $netstr\n";
6800 $cookie->{netcount}++;
6801 }
6802 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6803 my ($id, $netstr) = ($1, $2);
6804 my $net = parse_net($netstr);
6805 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6806 $netstr = print_net($net);
6807 $res .= "$id: $netstr\n";
6808 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6809 my $virtdev = $1;
6810 my $value = $3;
6811 my $di = parse_drive($virtdev, $value);
6812 if (defined($di->{backup}) && !$di->{backup}) {
6813 $res .= "#$line";
6814 } elsif ($map->{$virtdev}) {
6815 delete $di->{format}; # format can change on restore
6816 $di->{file} = $map->{$virtdev};
6817 $value = print_drive($di);
6818 $res .= "$virtdev: $value\n";
6819 } else {
6820 $res .= $line;
6821 }
6822 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6823 my $vmgenid = $1;
6824 if ($vmgenid ne '0') {
6825 # always generate a new vmgenid if there was a valid one setup
6826 $vmgenid = generate_uuid();
6827 }
6828 $res .= "vmgenid: $vmgenid\n";
6829 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6830 my ($uuid, $uuid_str);
6831 UUID::generate($uuid);
6832 UUID::unparse($uuid, $uuid_str);
6833 my $smbios1 = parse_smbios1($2);
6834 $smbios1->{uuid} = $uuid_str;
6835 $res .= $1.print_smbios1($smbios1)."\n";
6836 } else {
6837 $res .= $line;
6838 }
6839
6840 return $res;
6841 }
6842
6843 my $restore_deactivate_volumes = sub {
6844 my ($storecfg, $virtdev_hash) = @_;
6845
6846 my $vollist = [];
6847 for my $dev (values $virtdev_hash->%*) {
6848 push $vollist->@*, $dev->{volid} if $dev->{volid};
6849 }
6850
6851 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
6852 print STDERR $@ if $@;
6853 };
6854
6855 my $restore_destroy_volumes = sub {
6856 my ($storecfg, $virtdev_hash) = @_;
6857
6858 for my $dev (values $virtdev_hash->%*) {
6859 my $volid = $dev->{volid} or next;
6860 eval {
6861 PVE::Storage::vdisk_free($storecfg, $volid);
6862 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6863 };
6864 print STDERR "unable to cleanup '$volid' - $@" if $@;
6865 }
6866 };
6867
6868 my $restore_merge_config = sub {
6869 my ($filename, $backup_conf_raw, $override_conf) = @_;
6870
6871 my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
6872 for my $key (keys $override_conf->%*) {
6873 $backup_conf->{$key} = $override_conf->{$key};
6874 }
6875
6876 return $backup_conf;
6877 };
6878
6879 sub scan_volids {
6880 my ($cfg, $vmid) = @_;
6881
6882 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6883
6884 my $volid_hash = {};
6885 foreach my $storeid (keys %$info) {
6886 foreach my $item (@{$info->{$storeid}}) {
6887 next if !($item->{volid} && $item->{size});
6888 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6889 $volid_hash->{$item->{volid}} = $item;
6890 }
6891 }
6892
6893 return $volid_hash;
6894 }
6895
6896 sub update_disk_config {
6897 my ($vmid, $conf, $volid_hash) = @_;
6898
6899 my $changes;
6900 my $prefix = "VM $vmid";
6901
6902 # used and unused disks
6903 my $referenced = {};
6904
6905 # Note: it is allowed to define multiple storages with same path (alias), so
6906 # we need to check both 'volid' and real 'path' (two different volid can point
6907 # to the same path).
6908
6909 my $referencedpath = {};
6910
6911 # update size info
6912 PVE::QemuConfig->foreach_volume($conf, sub {
6913 my ($opt, $drive) = @_;
6914
6915 my $volid = $drive->{file};
6916 return if !$volid;
6917 my $volume = $volid_hash->{$volid};
6918
6919 # mark volid as "in-use" for next step
6920 $referenced->{$volid} = 1;
6921 if ($volume && (my $path = $volume->{path})) {
6922 $referencedpath->{$path} = 1;
6923 }
6924
6925 return if drive_is_cdrom($drive);
6926 return if !$volume;
6927
6928 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6929 if (defined($updated)) {
6930 $changes = 1;
6931 $conf->{$opt} = print_drive($updated);
6932 print "$prefix ($opt): $msg\n";
6933 }
6934 });
6935
6936 # remove 'unusedX' entry if volume is used
6937 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6938 my ($opt, $drive) = @_;
6939
6940 my $volid = $drive->{file};
6941 return if !$volid;
6942
6943 my $path;
6944 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6945 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6946 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6947 $changes = 1;
6948 delete $conf->{$opt};
6949 }
6950
6951 $referenced->{$volid} = 1;
6952 $referencedpath->{$path} = 1 if $path;
6953 });
6954
6955 foreach my $volid (sort keys %$volid_hash) {
6956 next if $volid =~ m/vm-$vmid-state-/;
6957 next if $referenced->{$volid};
6958 my $path = $volid_hash->{$volid}->{path};
6959 next if !$path; # just to be sure
6960 next if $referencedpath->{$path};
6961 $changes = 1;
6962 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6963 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6964 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6965 }
6966
6967 return $changes;
6968 }
6969
6970 sub rescan {
6971 my ($vmid, $nolock, $dryrun) = @_;
6972
6973 my $cfg = PVE::Storage::config();
6974
6975 print "rescan volumes...\n";
6976 my $volid_hash = scan_volids($cfg, $vmid);
6977
6978 my $updatefn = sub {
6979 my ($vmid) = @_;
6980
6981 my $conf = PVE::QemuConfig->load_config($vmid);
6982
6983 PVE::QemuConfig->check_lock($conf);
6984
6985 my $vm_volids = {};
6986 foreach my $volid (keys %$volid_hash) {
6987 my $info = $volid_hash->{$volid};
6988 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6989 }
6990
6991 my $changes = update_disk_config($vmid, $conf, $vm_volids);
6992
6993 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
6994 };
6995
6996 if (defined($vmid)) {
6997 if ($nolock) {
6998 &$updatefn($vmid);
6999 } else {
7000 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
7001 }
7002 } else {
7003 my $vmlist = config_list();
7004 foreach my $vmid (keys %$vmlist) {
7005 if ($nolock) {
7006 &$updatefn($vmid);
7007 } else {
7008 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
7009 }
7010 }
7011 }
7012 }
7013
7014 sub restore_proxmox_backup_archive {
7015 my ($archive, $vmid, $user, $options) = @_;
7016
7017 my $storecfg = PVE::Storage::config();
7018
7019 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
7020 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7021
7022 my $fingerprint = $scfg->{fingerprint};
7023 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
7024
7025 my $repo = PVE::PBSClient::get_repository($scfg);
7026 my $namespace = $scfg->{namespace};
7027
7028 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
7029 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
7030 local $ENV{PBS_PASSWORD} = $password;
7031 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
7032
7033 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
7034 PVE::Storage::parse_volname($storecfg, $archive);
7035
7036 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
7037
7038 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
7039
7040 my $tmpdir = "/var/tmp/vzdumptmp$$";
7041 rmtree $tmpdir;
7042 mkpath $tmpdir;
7043
7044 my $conffile = PVE::QemuConfig->config_file($vmid);
7045 # disable interrupts (always do cleanups)
7046 local $SIG{INT} =
7047 local $SIG{TERM} =
7048 local $SIG{QUIT} =
7049 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7050
7051 # Note: $oldconf is undef if VM does not exists
7052 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7053 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
7054 my $new_conf_raw = '';
7055
7056 my $rpcenv = PVE::RPCEnvironment::get();
7057 my $devinfo = {}; # info about drives included in backup
7058 my $virtdev_hash = {}; # info about allocated drives
7059
7060 eval {
7061 # enable interrupts
7062 local $SIG{INT} =
7063 local $SIG{TERM} =
7064 local $SIG{QUIT} =
7065 local $SIG{HUP} =
7066 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7067
7068 my $cfgfn = "$tmpdir/qemu-server.conf";
7069 my $firewall_config_fn = "$tmpdir/fw.conf";
7070 my $index_fn = "$tmpdir/index.json";
7071
7072 my $cmd = "restore";
7073
7074 my $param = [$pbs_backup_name, "index.json", $index_fn];
7075 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7076 my $index = PVE::Tools::file_get_contents($index_fn);
7077 $index = decode_json($index);
7078
7079 foreach my $info (@{$index->{files}}) {
7080 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
7081 my $devname = $1;
7082 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
7083 $devinfo->{$devname}->{size} = $1;
7084 } else {
7085 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
7086 }
7087 }
7088 }
7089
7090 my $is_qemu_server_backup = scalar(
7091 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
7092 );
7093 if (!$is_qemu_server_backup) {
7094 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
7095 }
7096 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
7097
7098 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
7099 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7100
7101 if ($has_firewall_config) {
7102 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
7103 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7104
7105 my $pve_firewall_dir = '/etc/pve/firewall';
7106 mkdir $pve_firewall_dir; # make sure the dir exists
7107 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
7108 }
7109
7110 my $fh = IO::File->new($cfgfn, "r") ||
7111 die "unable to read qemu-server.conf - $!\n";
7112
7113 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
7114
7115 # fixme: rate limit?
7116
7117 # create empty/temp config
7118 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
7119
7120 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
7121
7122 # allocate volumes
7123 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
7124
7125 foreach my $virtdev (sort keys %$virtdev_hash) {
7126 my $d = $virtdev_hash->{$virtdev};
7127 next if $d->{is_cloudinit}; # no need to restore cloudinit
7128
7129 # this fails if storage is unavailable
7130 my $volid = $d->{volid};
7131 my $path = PVE::Storage::path($storecfg, $volid);
7132
7133 # for live-restore we only want to preload the efidisk and TPM state
7134 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
7135
7136 my @ns_arg;
7137 if (defined(my $ns = $scfg->{namespace})) {
7138 @ns_arg = ('--ns', $ns);
7139 }
7140
7141 my $pbs_restore_cmd = [
7142 '/usr/bin/pbs-restore',
7143 '--repository', $repo,
7144 @ns_arg,
7145 $pbs_backup_name,
7146 "$d->{devname}.img.fidx",
7147 $path,
7148 '--verbose',
7149 ];
7150
7151 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
7152 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
7153
7154 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
7155 push @$pbs_restore_cmd, '--skip-zero';
7156 }
7157
7158 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
7159 print "restore proxmox backup image: $dbg_cmdstring\n";
7160 run_command($pbs_restore_cmd);
7161 }
7162
7163 $fh->seek(0, 0) || die "seek failed - $!\n";
7164
7165 my $cookie = { netcount => 0 };
7166 while (defined(my $line = <$fh>)) {
7167 $new_conf_raw .= restore_update_config_line(
7168 $cookie,
7169 $map,
7170 $line,
7171 $options->{unique},
7172 );
7173 }
7174
7175 $fh->close();
7176 };
7177 my $err = $@;
7178
7179 if ($err || !$options->{live}) {
7180 $restore_deactivate_volumes->($storecfg, $virtdev_hash);
7181 }
7182
7183 rmtree $tmpdir;
7184
7185 if ($err) {
7186 $restore_destroy_volumes->($storecfg, $virtdev_hash);
7187 die $err;
7188 }
7189
7190 if ($options->{live}) {
7191 # keep lock during live-restore
7192 $new_conf_raw .= "\nlock: create";
7193 }
7194
7195 my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $options->{override_conf});
7196 check_restore_permissions($rpcenv, $user, $new_conf);
7197 PVE::QemuConfig->write_config($vmid, $new_conf);
7198
7199 eval { rescan($vmid, 1); };
7200 warn $@ if $@;
7201
7202 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
7203
7204 if ($options->{live}) {
7205 # enable interrupts
7206 local $SIG{INT} =
7207 local $SIG{TERM} =
7208 local $SIG{QUIT} =
7209 local $SIG{HUP} =
7210 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
7211
7212 my $conf = PVE::QemuConfig->load_config($vmid);
7213 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
7214
7215 # these special drives are already restored before start
7216 delete $devinfo->{'drive-efidisk0'};
7217 delete $devinfo->{'drive-tpmstate0-backup'};
7218
7219 my $pbs_opts = {
7220 repo => $repo,
7221 keyfile => $keyfile,
7222 snapshot => $pbs_backup_name,
7223 namespace => $namespace,
7224 };
7225 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $pbs_opts);
7226
7227 PVE::QemuConfig->remove_lock($vmid, "create");
7228 }
7229 }
7230
7231 sub pbs_live_restore {
7232 my ($vmid, $conf, $storecfg, $restored_disks, $opts) = @_;
7233
7234 print "starting VM for live-restore\n";
7235 print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n";
7236
7237 my $pbs_backing = {};
7238 for my $ds (keys %$restored_disks) {
7239 $ds =~ m/^drive-(.*)$/;
7240 my $confname = $1;
7241 $pbs_backing->{$confname} = {
7242 repository => $opts->{repo},
7243 snapshot => $opts->{snapshot},
7244 archive => "$ds.img.fidx",
7245 };
7246 $pbs_backing->{$confname}->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile};
7247 $pbs_backing->{$confname}->{namespace} = $opts->{namespace} if defined($opts->{namespace});
7248
7249 my $drive = parse_drive($confname, $conf->{$confname});
7250 print "restoring '$ds' to '$drive->{file}'\n";
7251 }
7252
7253 my $drives_streamed = 0;
7254 eval {
7255 # make sure HA doesn't interrupt our restore by stopping the VM
7256 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
7257 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
7258 }
7259
7260 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
7261 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
7262 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
7263
7264 my $qmeventd_fd = register_qmeventd_handle($vmid);
7265
7266 # begin streaming, i.e. data copy from PBS to target disk for every vol,
7267 # this will effectively collapse the backing image chain consisting of
7268 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
7269 # removes itself once all backing images vanish with 'auto-remove=on')
7270 my $jobs = {};
7271 for my $ds (sort keys %$restored_disks) {
7272 my $job_id = "restore-$ds";
7273 mon_cmd($vmid, 'block-stream',
7274 'job-id' => $job_id,
7275 device => "$ds",
7276 );
7277 $jobs->{$job_id} = {};
7278 }
7279
7280 mon_cmd($vmid, 'cont');
7281 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
7282
7283 print "restore-drive jobs finished successfully, removing all tracking block devices"
7284 ." to disconnect from Proxmox Backup Server\n";
7285
7286 for my $ds (sort keys %$restored_disks) {
7287 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
7288 }
7289
7290 close($qmeventd_fd);
7291 };
7292
7293 my $err = $@;
7294
7295 if ($err) {
7296 warn "An error occurred during live-restore: $err\n";
7297 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
7298 die "live-restore failed\n";
7299 }
7300 }
7301
7302 sub restore_vma_archive {
7303 my ($archive, $vmid, $user, $opts, $comp) = @_;
7304
7305 my $readfrom = $archive;
7306
7307 my $cfg = PVE::Storage::config();
7308 my $commands = [];
7309 my $bwlimit = $opts->{bwlimit};
7310
7311 my $dbg_cmdstring = '';
7312 my $add_pipe = sub {
7313 my ($cmd) = @_;
7314 push @$commands, $cmd;
7315 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
7316 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
7317 $readfrom = '-';
7318 };
7319
7320 my $input = undef;
7321 if ($archive eq '-') {
7322 $input = '<&STDIN';
7323 } else {
7324 # If we use a backup from a PVE defined storage we also consider that
7325 # storage's rate limit:
7326 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
7327 if (defined($volid)) {
7328 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
7329 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
7330 if ($readlimit) {
7331 print STDERR "applying read rate limit: $readlimit\n";
7332 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
7333 $add_pipe->($cstream);
7334 }
7335 }
7336 }
7337
7338 if ($comp) {
7339 my $info = PVE::Storage::decompressor_info('vma', $comp);
7340 my $cmd = $info->{decompressor};
7341 push @$cmd, $readfrom;
7342 $add_pipe->($cmd);
7343 }
7344
7345 my $tmpdir = "/var/tmp/vzdumptmp$$";
7346 rmtree $tmpdir;
7347
7348 # disable interrupts (always do cleanups)
7349 local $SIG{INT} =
7350 local $SIG{TERM} =
7351 local $SIG{QUIT} =
7352 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
7353
7354 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
7355 POSIX::mkfifo($mapfifo, 0600);
7356 my $fifofh;
7357 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
7358
7359 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
7360
7361 my $oldtimeout;
7362 my $timeout = 5;
7363
7364 my $devinfo = {}; # info about drives included in backup
7365 my $virtdev_hash = {}; # info about allocated drives
7366
7367 my $rpcenv = PVE::RPCEnvironment::get();
7368
7369 my $conffile = PVE::QemuConfig->config_file($vmid);
7370
7371 # Note: $oldconf is undef if VM does not exist
7372 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7373 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
7374 my $new_conf_raw = '';
7375
7376 my %storage_limits;
7377
7378 my $print_devmap = sub {
7379 my $cfgfn = "$tmpdir/qemu-server.conf";
7380
7381 # we can read the config - that is already extracted
7382 my $fh = IO::File->new($cfgfn, "r") ||
7383 die "unable to read qemu-server.conf - $!\n";
7384
7385 my $fwcfgfn = "$tmpdir/qemu-server.fw";
7386 if (-f $fwcfgfn) {
7387 my $pve_firewall_dir = '/etc/pve/firewall';
7388 mkdir $pve_firewall_dir; # make sure the dir exists
7389 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
7390 }
7391
7392 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
7393
7394 foreach my $info (values %{$virtdev_hash}) {
7395 my $storeid = $info->{storeid};
7396 next if defined($storage_limits{$storeid});
7397
7398 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
7399 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
7400 $storage_limits{$storeid} = $limit * 1024;
7401 }
7402
7403 foreach my $devname (keys %$devinfo) {
7404 die "found no device mapping information for device '$devname'\n"
7405 if !$devinfo->{$devname}->{virtdev};
7406 }
7407
7408 # create empty/temp config
7409 if ($oldconf) {
7410 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
7411 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
7412 }
7413
7414 # allocate volumes
7415 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
7416
7417 # print restore information to $fifofh
7418 foreach my $virtdev (sort keys %$virtdev_hash) {
7419 my $d = $virtdev_hash->{$virtdev};
7420 next if $d->{is_cloudinit}; # no need to restore cloudinit
7421
7422 my $storeid = $d->{storeid};
7423 my $volid = $d->{volid};
7424
7425 my $map_opts = '';
7426 if (my $limit = $storage_limits{$storeid}) {
7427 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
7428 }
7429
7430 my $write_zeros = 1;
7431 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
7432 $write_zeros = 0;
7433 }
7434
7435 my $path = PVE::Storage::path($cfg, $volid);
7436
7437 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
7438
7439 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
7440 }
7441
7442 $fh->seek(0, 0) || die "seek failed - $!\n";
7443
7444 my $cookie = { netcount => 0 };
7445 while (defined(my $line = <$fh>)) {
7446 $new_conf_raw .= restore_update_config_line(
7447 $cookie,
7448 $map,
7449 $line,
7450 $opts->{unique},
7451 );
7452 }
7453
7454 $fh->close();
7455 };
7456
7457 eval {
7458 # enable interrupts
7459 local $SIG{INT} =
7460 local $SIG{TERM} =
7461 local $SIG{QUIT} =
7462 local $SIG{HUP} =
7463 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7464 local $SIG{ALRM} = sub { die "got timeout\n"; };
7465
7466 $oldtimeout = alarm($timeout);
7467
7468 my $parser = sub {
7469 my $line = shift;
7470
7471 print "$line\n";
7472
7473 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
7474 my ($dev_id, $size, $devname) = ($1, $2, $3);
7475 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
7476 } elsif ($line =~ m/^CTIME: /) {
7477 # we correctly received the vma config, so we can disable
7478 # the timeout now for disk allocation (set to 10 minutes, so
7479 # that we always timeout if something goes wrong)
7480 alarm(600);
7481 &$print_devmap();
7482 print $fifofh "done\n";
7483 my $tmp = $oldtimeout || 0;
7484 $oldtimeout = undef;
7485 alarm($tmp);
7486 close($fifofh);
7487 $fifofh = undef;
7488 }
7489 };
7490
7491 print "restore vma archive: $dbg_cmdstring\n";
7492 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
7493 };
7494 my $err = $@;
7495
7496 alarm($oldtimeout) if $oldtimeout;
7497
7498 $restore_deactivate_volumes->($cfg, $virtdev_hash);
7499
7500 close($fifofh) if $fifofh;
7501 unlink $mapfifo;
7502 rmtree $tmpdir;
7503
7504 if ($err) {
7505 $restore_destroy_volumes->($cfg, $virtdev_hash);
7506 die $err;
7507 }
7508
7509 my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $opts->{override_conf});
7510 check_restore_permissions($rpcenv, $user, $new_conf);
7511 PVE::QemuConfig->write_config($vmid, $new_conf);
7512
7513 eval { rescan($vmid, 1); };
7514 warn $@ if $@;
7515
7516 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
7517 }
7518
7519 sub restore_tar_archive {
7520 my ($archive, $vmid, $user, $opts) = @_;
7521
7522 if (scalar(keys $opts->{override_conf}->%*) > 0) {
7523 my $keystring = join(' ', keys $opts->{override_conf}->%*);
7524 die "cannot pass along options ($keystring) when restoring from tar archive\n";
7525 }
7526
7527 if ($archive ne '-') {
7528 my $firstfile = tar_archive_read_firstfile($archive);
7529 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
7530 if $firstfile ne 'qemu-server.conf';
7531 }
7532
7533 my $storecfg = PVE::Storage::config();
7534
7535 # avoid zombie disks when restoring over an existing VM -> cleanup first
7536 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
7537 # skiplock=1 because qmrestore has set the 'create' lock itself already
7538 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
7539 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
7540
7541 my $tocmd = "/usr/lib/qemu-server/qmextract";
7542
7543 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
7544 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
7545 $tocmd .= ' --prealloc' if $opts->{prealloc};
7546 $tocmd .= ' --info' if $opts->{info};
7547
7548 # tar option "xf" does not autodetect compression when read from STDIN,
7549 # so we pipe to zcat
7550 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7551 PVE::Tools::shellquote("--to-command=$tocmd");
7552
7553 my $tmpdir = "/var/tmp/vzdumptmp$$";
7554 mkpath $tmpdir;
7555
7556 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7557 local $ENV{VZDUMP_VMID} = $vmid;
7558 local $ENV{VZDUMP_USER} = $user;
7559
7560 my $conffile = PVE::QemuConfig->config_file($vmid);
7561 my $new_conf_raw = '';
7562
7563 # disable interrupts (always do cleanups)
7564 local $SIG{INT} =
7565 local $SIG{TERM} =
7566 local $SIG{QUIT} =
7567 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7568
7569 eval {
7570 # enable interrupts
7571 local $SIG{INT} =
7572 local $SIG{TERM} =
7573 local $SIG{QUIT} =
7574 local $SIG{HUP} =
7575 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7576
7577 if ($archive eq '-') {
7578 print "extracting archive from STDIN\n";
7579 run_command($cmd, input => "<&STDIN");
7580 } else {
7581 print "extracting archive '$archive'\n";
7582 run_command($cmd);
7583 }
7584
7585 return if $opts->{info};
7586
7587 # read new mapping
7588 my $map = {};
7589 my $statfile = "$tmpdir/qmrestore.stat";
7590 if (my $fd = IO::File->new($statfile, "r")) {
7591 while (defined (my $line = <$fd>)) {
7592 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7593 $map->{$1} = $2 if $1;
7594 } else {
7595 print STDERR "unable to parse line in statfile - $line\n";
7596 }
7597 }
7598 $fd->close();
7599 }
7600
7601 my $confsrc = "$tmpdir/qemu-server.conf";
7602
7603 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
7604
7605 my $cookie = { netcount => 0 };
7606 while (defined (my $line = <$srcfd>)) {
7607 $new_conf_raw .= restore_update_config_line(
7608 $cookie,
7609 $map,
7610 $line,
7611 $opts->{unique},
7612 );
7613 }
7614
7615 $srcfd->close();
7616 };
7617 if (my $err = $@) {
7618 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7619 die $err;
7620 }
7621
7622 rmtree $tmpdir;
7623
7624 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7625
7626 PVE::Cluster::cfs_update(); # make sure we read new file
7627
7628 eval { rescan($vmid, 1); };
7629 warn $@ if $@;
7630 };
7631
7632 sub foreach_storage_used_by_vm {
7633 my ($conf, $func) = @_;
7634
7635 my $sidhash = {};
7636
7637 PVE::QemuConfig->foreach_volume($conf, sub {
7638 my ($ds, $drive) = @_;
7639 return if drive_is_cdrom($drive);
7640
7641 my $volid = $drive->{file};
7642
7643 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7644 $sidhash->{$sid} = $sid if $sid;
7645 });
7646
7647 foreach my $sid (sort keys %$sidhash) {
7648 &$func($sid);
7649 }
7650 }
7651
7652 my $qemu_snap_storage = {
7653 rbd => 1,
7654 };
7655 sub do_snapshots_with_qemu {
7656 my ($storecfg, $volid, $deviceid) = @_;
7657
7658 return if $deviceid =~ m/tpmstate0/;
7659
7660 my $storage_name = PVE::Storage::parse_volume_id($volid);
7661 my $scfg = $storecfg->{ids}->{$storage_name};
7662 die "could not find storage '$storage_name'\n" if !defined($scfg);
7663
7664 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7665 return 1;
7666 }
7667
7668 if ($volid =~ m/\.(qcow2|qed)$/){
7669 return 1;
7670 }
7671
7672 return;
7673 }
7674
7675 sub qga_check_running {
7676 my ($vmid, $nowarn) = @_;
7677
7678 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7679 if ($@) {
7680 warn "QEMU Guest Agent is not running - $@" if !$nowarn;
7681 return 0;
7682 }
7683 return 1;
7684 }
7685
7686 sub template_create {
7687 my ($vmid, $conf, $disk) = @_;
7688
7689 my $storecfg = PVE::Storage::config();
7690
7691 PVE::QemuConfig->foreach_volume($conf, sub {
7692 my ($ds, $drive) = @_;
7693
7694 return if drive_is_cdrom($drive);
7695 return if $disk && $ds ne $disk;
7696
7697 my $volid = $drive->{file};
7698 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7699
7700 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7701 $drive->{file} = $voliddst;
7702 $conf->{$ds} = print_drive($drive);
7703 PVE::QemuConfig->write_config($vmid, $conf);
7704 });
7705 }
7706
7707 sub convert_iscsi_path {
7708 my ($path) = @_;
7709
7710 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7711 my $portal = $1;
7712 my $target = $2;
7713 my $lun = $3;
7714
7715 my $initiator_name = get_initiator_name();
7716
7717 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7718 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7719 }
7720
7721 die "cannot convert iscsi path '$path', unkown format\n";
7722 }
7723
7724 sub qemu_img_convert {
7725 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized, $bwlimit) = @_;
7726
7727 my $storecfg = PVE::Storage::config();
7728 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7729 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7730
7731 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7732
7733 my $cachemode;
7734 my $src_path;
7735 my $src_is_iscsi = 0;
7736 my $src_format;
7737
7738 if ($src_storeid) {
7739 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7740 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7741 $src_format = qemu_img_format($src_scfg, $src_volname);
7742 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7743 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7744 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7745 } elsif (-f $src_volid || -b $src_volid) {
7746 $src_path = $src_volid;
7747 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7748 $src_format = $1;
7749 }
7750 }
7751
7752 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7753
7754 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7755 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7756 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7757 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7758
7759 my $cmd = [];
7760 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7761 push @$cmd, '-l', "snapshot.name=$snapname"
7762 if $snapname && $src_format && $src_format eq "qcow2";
7763 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7764 push @$cmd, '-T', $cachemode if defined($cachemode);
7765 push @$cmd, '-r', "${bwlimit}K" if defined($bwlimit);
7766
7767 if ($src_is_iscsi) {
7768 push @$cmd, '--image-opts';
7769 $src_path = convert_iscsi_path($src_path);
7770 } elsif ($src_format) {
7771 push @$cmd, '-f', $src_format;
7772 }
7773
7774 if ($dst_is_iscsi) {
7775 push @$cmd, '--target-image-opts';
7776 $dst_path = convert_iscsi_path($dst_path);
7777 } else {
7778 push @$cmd, '-O', $dst_format;
7779 }
7780
7781 push @$cmd, $src_path;
7782
7783 if (!$dst_is_iscsi && $is_zero_initialized) {
7784 push @$cmd, "zeroinit:$dst_path";
7785 } else {
7786 push @$cmd, $dst_path;
7787 }
7788
7789 my $parser = sub {
7790 my $line = shift;
7791 if($line =~ m/\((\S+)\/100\%\)/){
7792 my $percent = $1;
7793 my $transferred = int($size * $percent / 100);
7794 my $total_h = render_bytes($size, 1);
7795 my $transferred_h = render_bytes($transferred, 1);
7796
7797 print "transferred $transferred_h of $total_h ($percent%)\n";
7798 }
7799
7800 };
7801
7802 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7803 my $err = $@;
7804 die "copy failed: $err" if $err;
7805 }
7806
7807 sub qemu_img_format {
7808 my ($scfg, $volname) = @_;
7809
7810 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7811 return $1;
7812 } else {
7813 return "raw";
7814 }
7815 }
7816
7817 sub qemu_drive_mirror {
7818 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7819
7820 $jobs = {} if !$jobs;
7821
7822 my $qemu_target;
7823 my $format;
7824 $jobs->{"drive-$drive"} = {};
7825
7826 if ($dst_volid =~ /^nbd:/) {
7827 $qemu_target = $dst_volid;
7828 $format = "nbd";
7829 } else {
7830 my $storecfg = PVE::Storage::config();
7831 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7832
7833 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7834
7835 $format = qemu_img_format($dst_scfg, $dst_volname);
7836
7837 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7838
7839 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7840 }
7841
7842 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7843 $opts->{format} = $format if $format;
7844
7845 if (defined($src_bitmap)) {
7846 $opts->{sync} = 'incremental';
7847 $opts->{bitmap} = $src_bitmap;
7848 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7849 }
7850
7851 if (defined($bwlimit)) {
7852 $opts->{speed} = $bwlimit * 1024;
7853 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7854 } else {
7855 print "drive mirror is starting for drive-$drive\n";
7856 }
7857
7858 # if a job already runs for this device we get an error, catch it for cleanup
7859 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7860 if (my $err = $@) {
7861 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7862 warn "$@\n" if $@;
7863 die "mirroring error: $err\n";
7864 }
7865
7866 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7867 }
7868
7869 # $completion can be either
7870 # 'complete': wait until all jobs are ready, block-job-complete them (default)
7871 # 'cancel': wait until all jobs are ready, block-job-cancel them
7872 # 'skip': wait until all jobs are ready, return with block jobs in ready state
7873 # 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7874 sub qemu_drive_mirror_monitor {
7875 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7876
7877 $completion //= 'complete';
7878 $op //= "mirror";
7879
7880 eval {
7881 my $err_complete = 0;
7882
7883 my $starttime = time ();
7884 while (1) {
7885 die "block job ('$op') timed out\n" if $err_complete > 300;
7886
7887 my $stats = mon_cmd($vmid, "query-block-jobs");
7888 my $ctime = time();
7889
7890 my $running_jobs = {};
7891 for my $stat (@$stats) {
7892 next if $stat->{type} ne $op;
7893 $running_jobs->{$stat->{device}} = $stat;
7894 }
7895
7896 my $readycounter = 0;
7897
7898 for my $job_id (sort keys %$jobs) {
7899 my $job = $running_jobs->{$job_id};
7900
7901 my $vanished = !defined($job);
7902 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7903 if($complete || ($vanished && $completion eq 'auto')) {
7904 print "$job_id: $op-job finished\n";
7905 delete $jobs->{$job_id};
7906 next;
7907 }
7908
7909 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7910
7911 my $busy = $job->{busy};
7912 my $ready = $job->{ready};
7913 if (my $total = $job->{len}) {
7914 my $transferred = $job->{offset} || 0;
7915 my $remaining = $total - $transferred;
7916 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7917
7918 my $duration = $ctime - $starttime;
7919 my $total_h = render_bytes($total, 1);
7920 my $transferred_h = render_bytes($transferred, 1);
7921
7922 my $status = sprintf(
7923 "transferred $transferred_h of $total_h ($percent%%) in %s",
7924 render_duration($duration),
7925 );
7926
7927 if ($ready) {
7928 if ($busy) {
7929 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7930 } else {
7931 $status .= ", ready";
7932 }
7933 }
7934 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7935 $jobs->{$job_id}->{ready} = $ready;
7936 }
7937
7938 $readycounter++ if $job->{ready};
7939 }
7940
7941 last if scalar(keys %$jobs) == 0;
7942
7943 if ($readycounter == scalar(keys %$jobs)) {
7944 print "all '$op' jobs are ready\n";
7945
7946 # do the complete later (or has already been done)
7947 last if $completion eq 'skip' || $completion eq 'auto';
7948
7949 if ($vmiddst && $vmiddst != $vmid) {
7950 my $agent_running = $qga && qga_check_running($vmid);
7951 if ($agent_running) {
7952 print "freeze filesystem\n";
7953 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
7954 warn $@ if $@;
7955 } else {
7956 print "suspend vm\n";
7957 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
7958 warn $@ if $@;
7959 }
7960
7961 # if we clone a disk for a new target vm, we don't switch the disk
7962 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
7963
7964 if ($agent_running) {
7965 print "unfreeze filesystem\n";
7966 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
7967 warn $@ if $@;
7968 } else {
7969 print "resume vm\n";
7970 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7971 warn $@ if $@;
7972 }
7973
7974 last;
7975 } else {
7976
7977 for my $job_id (sort keys %$jobs) {
7978 # try to switch the disk if source and destination are on the same guest
7979 print "$job_id: Completing block job_id...\n";
7980
7981 my $op;
7982 if ($completion eq 'complete') {
7983 $op = 'block-job-complete';
7984 } elsif ($completion eq 'cancel') {
7985 $op = 'block-job-cancel';
7986 } else {
7987 die "invalid completion value: $completion\n";
7988 }
7989 eval { mon_cmd($vmid, $op, device => $job_id) };
7990 if ($@ =~ m/cannot be completed/) {
7991 print "$job_id: block job cannot be completed, trying again.\n";
7992 $err_complete++;
7993 }else {
7994 print "$job_id: Completed successfully.\n";
7995 $jobs->{$job_id}->{complete} = 1;
7996 }
7997 }
7998 }
7999 }
8000 sleep 1;
8001 }
8002 };
8003 my $err = $@;
8004
8005 if ($err) {
8006 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
8007 die "block job ($op) error: $err";
8008 }
8009 }
8010
8011 sub qemu_blockjobs_cancel {
8012 my ($vmid, $jobs) = @_;
8013
8014 foreach my $job (keys %$jobs) {
8015 print "$job: Cancelling block job\n";
8016 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
8017 $jobs->{$job}->{cancel} = 1;
8018 }
8019
8020 while (1) {
8021 my $stats = mon_cmd($vmid, "query-block-jobs");
8022
8023 my $running_jobs = {};
8024 foreach my $stat (@$stats) {
8025 $running_jobs->{$stat->{device}} = $stat;
8026 }
8027
8028 foreach my $job (keys %$jobs) {
8029
8030 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
8031 print "$job: Done.\n";
8032 delete $jobs->{$job};
8033 }
8034 }
8035
8036 last if scalar(keys %$jobs) == 0;
8037
8038 sleep 1;
8039 }
8040 }
8041
8042 # Check for bug #4525: drive-mirror will open the target drive with the same aio setting as the
8043 # source, but some storages have problems with io_uring, sometimes even leading to crashes.
8044 my sub clone_disk_check_io_uring {
8045 my ($src_drive, $storecfg, $src_storeid, $dst_storeid, $use_drive_mirror) = @_;
8046
8047 return if !$use_drive_mirror;
8048
8049 # Don't complain when not changing storage.
8050 # Assume if it works for the source, it'll work for the target too.
8051 return if $src_storeid eq $dst_storeid;
8052
8053 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
8054 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
8055
8056 my $cache_direct = drive_uses_cache_direct($src_drive);
8057
8058 my $src_uses_io_uring;
8059 if ($src_drive->{aio}) {
8060 $src_uses_io_uring = $src_drive->{aio} eq 'io_uring';
8061 } else {
8062 $src_uses_io_uring = storage_allows_io_uring_default($src_scfg, $cache_direct);
8063 }
8064
8065 die "target storage is known to cause issues with aio=io_uring (used by current drive)\n"
8066 if $src_uses_io_uring && !storage_allows_io_uring_default($dst_scfg, $cache_direct);
8067 }
8068
8069 sub clone_disk {
8070 my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
8071
8072 my ($vmid, $running) = $source->@{qw(vmid running)};
8073 my ($src_drivename, $drive, $snapname) = $source->@{qw(drivename drive snapname)};
8074
8075 my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)};
8076 my ($storage, $format) = $dest->@{qw(storage format)};
8077
8078 my $use_drive_mirror = $full && $running && $src_drivename && !$snapname;
8079
8080 if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) {
8081 die "cloning from/to EFI disk requires EFI disk\n"
8082 if $src_drivename eq 'efidisk0' || $dst_drivename eq 'efidisk0';
8083 die "cloning from/to TPM state requires TPM state\n"
8084 if $src_drivename eq 'tpmstate0' || $dst_drivename eq 'tpmstate0';
8085
8086 # This would lead to two device nodes in QEMU pointing to the same backing image!
8087 die "cannot change drive name when cloning disk from/to the same VM\n"
8088 if $use_drive_mirror && $vmid == $newvmid;
8089 }
8090
8091 die "cannot move TPM state while VM is running\n"
8092 if $use_drive_mirror && $src_drivename eq 'tpmstate0';
8093
8094 my $newvolid;
8095
8096 print "create " . ($full ? 'full' : 'linked') . " clone of drive ";
8097 print "$src_drivename " if $src_drivename;
8098 print "($drive->{file})\n";
8099
8100 if (!$full) {
8101 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
8102 push @$newvollist, $newvolid;
8103 } else {
8104 my ($src_storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
8105 my $storeid = $storage || $src_storeid;
8106
8107 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
8108
8109 my $name = undef;
8110 my $size = undef;
8111 if (drive_is_cloudinit($drive)) {
8112 $name = "vm-$newvmid-cloudinit";
8113 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8114 if ($scfg->{path}) {
8115 $name .= ".$dst_format";
8116 }
8117 $snapname = undef;
8118 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
8119 } elsif ($dst_drivename eq 'efidisk0') {
8120 $size = $efisize or die "internal error - need to specify EFI disk size\n";
8121 } elsif ($dst_drivename eq 'tpmstate0') {
8122 $dst_format = 'raw';
8123 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8124 } else {
8125 clone_disk_check_io_uring($drive, $storecfg, $src_storeid, $storeid, $use_drive_mirror);
8126
8127 $size = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
8128 }
8129 $newvolid = PVE::Storage::vdisk_alloc(
8130 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
8131 );
8132 push @$newvollist, $newvolid;
8133
8134 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
8135
8136 if (drive_is_cloudinit($drive)) {
8137 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
8138 # if this is the case, we have to complete any block-jobs still there from
8139 # previous drive-mirrors
8140 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
8141 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
8142 }
8143 goto no_data_clone;
8144 }
8145
8146 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
8147 if ($use_drive_mirror) {
8148 qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
8149 $completion, $qga, $bwlimit);
8150 } else {
8151 if ($dst_drivename eq 'efidisk0') {
8152 # the relevant data on the efidisk may be smaller than the source
8153 # e.g. on RBD/ZFS, so we use dd to copy only the amount
8154 # that is given by the OVMF_VARS.fd
8155 my $src_path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
8156 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
8157
8158 my $src_format = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[6];
8159
8160 # better for Ceph if block size is not too small, see bug #3324
8161 my $bs = 1024*1024;
8162
8163 my $cmd = ['qemu-img', 'dd', '-n', '-O', $dst_format];
8164
8165 if ($src_format eq 'qcow2' && $snapname) {
8166 die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
8167 if !min_version(kvm_user_version(), 6, 2);
8168 push $cmd->@*, '-l', $snapname;
8169 }
8170 push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
8171 run_command($cmd);
8172 } else {
8173 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit, $bwlimit);
8174 }
8175 }
8176 }
8177
8178 no_data_clone:
8179 my $size = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
8180
8181 my $disk = dclone($drive);
8182 delete $disk->{format};
8183 $disk->{file} = $newvolid;
8184 $disk->{size} = $size if defined($size);
8185
8186 return $disk;
8187 }
8188
8189 sub get_running_qemu_version {
8190 my ($vmid) = @_;
8191 my $res = mon_cmd($vmid, "query-version");
8192 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
8193 }
8194
8195 sub qemu_use_old_bios_files {
8196 my ($machine_type) = @_;
8197
8198 return if !$machine_type;
8199
8200 my $use_old_bios_files = undef;
8201
8202 if ($machine_type =~ m/^(\S+)\.pxe$/) {
8203 $machine_type = $1;
8204 $use_old_bios_files = 1;
8205 } else {
8206 my $version = extract_version($machine_type, kvm_user_version());
8207 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
8208 # load new efi bios files on migration. So this hack is required to allow
8209 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
8210 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
8211 $use_old_bios_files = !min_version($version, 2, 4);
8212 }
8213
8214 return ($use_old_bios_files, $machine_type);
8215 }
8216
8217 sub get_efivars_size {
8218 my ($conf, $efidisk) = @_;
8219
8220 my $arch = get_vm_arch($conf);
8221 $efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
8222 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
8223 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
8224 return -s $ovmf_vars;
8225 }
8226
8227 sub update_efidisk_size {
8228 my ($conf) = @_;
8229
8230 return if !defined($conf->{efidisk0});
8231
8232 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
8233 $disk->{size} = get_efivars_size($conf);
8234 $conf->{efidisk0} = print_drive($disk);
8235
8236 return;
8237 }
8238
8239 sub update_tpmstate_size {
8240 my ($conf) = @_;
8241
8242 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
8243 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8244 $conf->{tpmstate0} = print_drive($disk);
8245 }
8246
8247 sub create_efidisk($$$$$$$) {
8248 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
8249
8250 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
8251
8252 my $vars_size_b = -s $ovmf_vars;
8253 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
8254 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
8255 PVE::Storage::activate_volumes($storecfg, [$volid]);
8256
8257 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
8258 my $size = PVE::Storage::volume_size_info($storecfg, $volid, 3);
8259
8260 return ($volid, $size/1024);
8261 }
8262
8263 sub vm_iothreads_list {
8264 my ($vmid) = @_;
8265
8266 my $res = mon_cmd($vmid, 'query-iothreads');
8267
8268 my $iothreads = {};
8269 foreach my $iothread (@$res) {
8270 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
8271 }
8272
8273 return $iothreads;
8274 }
8275
8276 sub scsihw_infos {
8277 my ($conf, $drive) = @_;
8278
8279 my $maxdev = 0;
8280
8281 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
8282 $maxdev = 7;
8283 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
8284 $maxdev = 1;
8285 } else {
8286 $maxdev = 256;
8287 }
8288
8289 my $controller = int($drive->{index} / $maxdev);
8290 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
8291 ? "virtioscsi"
8292 : "scsihw";
8293
8294 return ($maxdev, $controller, $controller_prefix);
8295 }
8296
8297 sub resolve_dst_disk_format {
8298 my ($storecfg, $storeid, $src_volname, $format) = @_;
8299 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
8300
8301 if (!$format) {
8302 # if no target format is specified, use the source disk format as hint
8303 if ($src_volname) {
8304 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8305 $format = qemu_img_format($scfg, $src_volname);
8306 } else {
8307 return $defFormat;
8308 }
8309 }
8310
8311 # test if requested format is supported - else use default
8312 my $supported = grep { $_ eq $format } @$validFormats;
8313 $format = $defFormat if !$supported;
8314 return $format;
8315 }
8316
8317 # NOTE: if this logic changes, please update docs & possibly gui logic
8318 sub find_vmstate_storage {
8319 my ($conf, $storecfg) = @_;
8320
8321 # first, return storage from conf if set
8322 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
8323
8324 my ($target, $shared, $local);
8325
8326 foreach_storage_used_by_vm($conf, sub {
8327 my ($sid) = @_;
8328 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
8329 my $dst = $scfg->{shared} ? \$shared : \$local;
8330 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
8331 });
8332
8333 # second, use shared storage where VM has at least one disk
8334 # third, use local storage where VM has at least one disk
8335 # fall back to local storage
8336 $target = $shared // $local // 'local';
8337
8338 return $target;
8339 }
8340
8341 sub generate_uuid {
8342 my ($uuid, $uuid_str);
8343 UUID::generate($uuid);
8344 UUID::unparse($uuid, $uuid_str);
8345 return $uuid_str;
8346 }
8347
8348 sub generate_smbios1_uuid {
8349 return "uuid=".generate_uuid();
8350 }
8351
8352 sub nbd_stop {
8353 my ($vmid) = @_;
8354
8355 mon_cmd($vmid, 'nbd-server-stop');
8356 }
8357
8358 sub create_reboot_request {
8359 my ($vmid) = @_;
8360 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
8361 or die "failed to create reboot trigger file: $!\n";
8362 close($fh);
8363 }
8364
8365 sub clear_reboot_request {
8366 my ($vmid) = @_;
8367 my $path = "/run/qemu-server/$vmid.reboot";
8368 my $res = 0;
8369
8370 $res = unlink($path);
8371 die "could not remove reboot request for $vmid: $!"
8372 if !$res && $! != POSIX::ENOENT;
8373
8374 return $res;
8375 }
8376
8377 sub bootorder_from_legacy {
8378 my ($conf, $bootcfg) = @_;
8379
8380 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
8381 my $bootindex_hash = {};
8382 my $i = 1;
8383 foreach my $o (split(//, $boot)) {
8384 $bootindex_hash->{$o} = $i*100;
8385 $i++;
8386 }
8387
8388 my $bootorder = {};
8389
8390 PVE::QemuConfig->foreach_volume($conf, sub {
8391 my ($ds, $drive) = @_;
8392
8393 if (drive_is_cdrom ($drive, 1)) {
8394 if ($bootindex_hash->{d}) {
8395 $bootorder->{$ds} = $bootindex_hash->{d};
8396 $bootindex_hash->{d} += 1;
8397 }
8398 } elsif ($bootindex_hash->{c}) {
8399 $bootorder->{$ds} = $bootindex_hash->{c}
8400 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
8401 $bootindex_hash->{c} += 1;
8402 }
8403 });
8404
8405 if ($bootindex_hash->{n}) {
8406 for (my $i = 0; $i < $MAX_NETS; $i++) {
8407 my $netname = "net$i";
8408 next if !$conf->{$netname};
8409 $bootorder->{$netname} = $bootindex_hash->{n};
8410 $bootindex_hash->{n} += 1;
8411 }
8412 }
8413
8414 return $bootorder;
8415 }
8416
8417 # Generate default device list for 'boot: order=' property. Matches legacy
8418 # default boot order, but with explicit device names. This is important, since
8419 # the fallback for when neither 'order' nor the old format is specified relies
8420 # on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
8421 sub get_default_bootdevices {
8422 my ($conf) = @_;
8423
8424 my @ret = ();
8425
8426 # harddisk
8427 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
8428 push @ret, $first if $first;
8429
8430 # cdrom
8431 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
8432 push @ret, $first if $first;
8433
8434 # network
8435 for (my $i = 0; $i < $MAX_NETS; $i++) {
8436 my $netname = "net$i";
8437 next if !$conf->{$netname};
8438 push @ret, $netname;
8439 last;
8440 }
8441
8442 return \@ret;
8443 }
8444
8445 sub device_bootorder {
8446 my ($conf) = @_;
8447
8448 return bootorder_from_legacy($conf) if !defined($conf->{boot});
8449
8450 my $boot = parse_property_string($boot_fmt, $conf->{boot});
8451
8452 my $bootorder = {};
8453 if (!defined($boot) || $boot->{legacy}) {
8454 $bootorder = bootorder_from_legacy($conf, $boot);
8455 } elsif ($boot->{order}) {
8456 my $i = 100; # start at 100 to allow user to insert devices before us with -args
8457 for my $dev (PVE::Tools::split_list($boot->{order})) {
8458 $bootorder->{$dev} = $i++;
8459 }
8460 }
8461
8462 return $bootorder;
8463 }
8464
8465 sub register_qmeventd_handle {
8466 my ($vmid) = @_;
8467
8468 my $fh;
8469 my $peer = "/var/run/qmeventd.sock";
8470 my $count = 0;
8471
8472 for (;;) {
8473 $count++;
8474 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
8475 last if $fh;
8476 if ($! != EINTR && $! != EAGAIN) {
8477 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
8478 }
8479 if ($count > 4) {
8480 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
8481 . "after $count retries\n";
8482 }
8483 usleep(25000);
8484 }
8485
8486 # send handshake to mark VM as backing up
8487 print $fh to_json({vzdump => {vmid => "$vmid"}});
8488
8489 # return handle to be closed later when inhibit is no longer required
8490 return $fh;
8491 }
8492
8493 # bash completion helper
8494
8495 sub complete_backup_archives {
8496 my ($cmdname, $pname, $cvalue) = @_;
8497
8498 my $cfg = PVE::Storage::config();
8499
8500 my $storeid;
8501
8502 if ($cvalue =~ m/^([^:]+):/) {
8503 $storeid = $1;
8504 }
8505
8506 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
8507
8508 my $res = [];
8509 foreach my $id (keys %$data) {
8510 foreach my $item (@{$data->{$id}}) {
8511 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
8512 push @$res, $item->{volid} if defined($item->{volid});
8513 }
8514 }
8515
8516 return $res;
8517 }
8518
8519 my $complete_vmid_full = sub {
8520 my ($running) = @_;
8521
8522 my $idlist = vmstatus();
8523
8524 my $res = [];
8525
8526 foreach my $id (keys %$idlist) {
8527 my $d = $idlist->{$id};
8528 if (defined($running)) {
8529 next if $d->{template};
8530 next if $running && $d->{status} ne 'running';
8531 next if !$running && $d->{status} eq 'running';
8532 }
8533 push @$res, $id;
8534
8535 }
8536 return $res;
8537 };
8538
8539 sub complete_vmid {
8540 return &$complete_vmid_full();
8541 }
8542
8543 sub complete_vmid_stopped {
8544 return &$complete_vmid_full(0);
8545 }
8546
8547 sub complete_vmid_running {
8548 return &$complete_vmid_full(1);
8549 }
8550
8551 sub complete_storage {
8552
8553 my $cfg = PVE::Storage::config();
8554 my $ids = $cfg->{ids};
8555
8556 my $res = [];
8557 foreach my $sid (keys %$ids) {
8558 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
8559 next if !$ids->{$sid}->{content}->{images};
8560 push @$res, $sid;
8561 }
8562
8563 return $res;
8564 }
8565
8566 sub complete_migration_storage {
8567 my ($cmd, $param, $current_value, $all_args) = @_;
8568
8569 my $targetnode = @$all_args[1];
8570
8571 my $cfg = PVE::Storage::config();
8572 my $ids = $cfg->{ids};
8573
8574 my $res = [];
8575 foreach my $sid (keys %$ids) {
8576 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
8577 next if !$ids->{$sid}->{content}->{images};
8578 push @$res, $sid;
8579 }
8580
8581 return $res;
8582 }
8583
8584 sub vm_is_paused {
8585 my ($vmid) = @_;
8586 my $qmpstatus = eval {
8587 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8588 mon_cmd($vmid, "query-status");
8589 };
8590 warn "$@\n" if $@;
8591 return $qmpstatus && $qmpstatus->{status} eq "paused";
8592 }
8593
8594 sub check_volume_storage_type {
8595 my ($storecfg, $vol) = @_;
8596
8597 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8598 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8599 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8600
8601 die "storage '$storeid' does not support content-type '$vtype'\n"
8602 if !$scfg->{content}->{$vtype};
8603
8604 return 1;
8605 }
8606
8607 sub add_nets_bridge_fdb {
8608 my ($conf, $vmid) = @_;
8609
8610 for my $opt (keys %$conf) {
8611 next if $opt !~ m/^net(\d+)$/;
8612 my $iface = "tap${vmid}i$1";
8613 # NOTE: expect setups with learning off to *not* use auto-random-generation of MAC on start
8614 my $net = parse_net($conf->{$opt}, 1) or next;
8615
8616 my $mac = $net->{macaddr};
8617 if (!$mac) {
8618 log_warn("MAC learning disabled, but vNIC '$iface' has no static MAC to add to forwarding DB!")
8619 if !file_read_firstline("/sys/class/net/$iface/brport/learning");
8620 next;
8621 }
8622
8623 my $bridge = $net->{bridge};
8624 if (!$bridge) {
8625 log_warn("Interface '$iface' not attached to any bridge.");
8626 next;
8627 }
8628 if ($have_sdn) {
8629 PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
8630 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
8631 PVE::Network::add_bridge_fdb($iface, $mac, $net->{firewall});
8632 }
8633 }
8634 }
8635
8636 sub del_nets_bridge_fdb {
8637 my ($conf, $vmid) = @_;
8638
8639 for my $opt (keys %$conf) {
8640 next if $opt !~ m/^net(\d+)$/;
8641 my $iface = "tap${vmid}i$1";
8642
8643 my $net = parse_net($conf->{$opt}) or next;
8644 my $mac = $net->{macaddr} or next;
8645
8646 my $bridge = $net->{bridge};
8647 if ($have_sdn) {
8648 PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
8649 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
8650 PVE::Network::del_bridge_fdb($iface, $mac, $net->{firewall});
8651 }
8652 }
8653 }
8654
8655 1;