]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
config: define machine schema as property-string
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5
6 use Cwd 'abs_path';
7 use Digest::SHA;
8 use Fcntl ':flock';
9 use Fcntl;
10 use File::Basename;
11 use File::Copy qw(copy);
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use IO::Dir;
16 use IO::File;
17 use IO::Handle;
18 use IO::Select;
19 use IO::Socket::UNIX;
20 use IPC::Open3;
21 use JSON;
22 use List::Util qw(first);
23 use MIME::Base64;
24 use POSIX;
25 use Storable qw(dclone);
26 use Time::HiRes qw(gettimeofday usleep);
27 use URI::Escape;
28 use UUID;
29
30 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
31 use PVE::CGroup;
32 use PVE::CpuSet;
33 use PVE::DataCenterConfig;
34 use PVE::Exception qw(raise raise_param_exc);
35 use PVE::Format qw(render_duration render_bytes);
36 use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
37 use PVE::HA::Config;
38 use PVE::Mapping::PCI;
39 use PVE::Mapping::USB;
40 use PVE::INotify;
41 use PVE::JSONSchema qw(get_standard_option parse_property_string);
42 use PVE::ProcFSTools;
43 use PVE::PBSClient;
44 use PVE::RESTEnvironment qw(log_warn);
45 use PVE::RPCEnvironment;
46 use PVE::Storage;
47 use PVE::SysFSTools;
48 use PVE::Systemd;
49 use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
50
51 use PVE::QMPClient;
52 use PVE::QemuConfig;
53 use PVE::QemuServer::Helpers qw(config_aware_timeout min_version windows_version);
54 use PVE::QemuServer::Cloudinit;
55 use PVE::QemuServer::CGroup;
56 use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options get_cpu_bitness is_native_arch);
57 use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
58 use PVE::QemuServer::Machine;
59 use PVE::QemuServer::Memory qw(get_current_memory);
60 use PVE::QemuServer::Monitor qw(mon_cmd);
61 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
62 use PVE::QemuServer::QMPHelpers qw(qemu_deviceadd qemu_devicedel qemu_objectadd qemu_objectdel);
63 use PVE::QemuServer::USB;
64
65 my $have_sdn;
66 eval {
67 require PVE::Network::SDN::Zones;
68 require PVE::Network::SDN::Vnets;
69 $have_sdn = 1;
70 };
71
72 my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
73 my $OVMF = {
74 x86_64 => {
75 '4m-no-smm' => [
76 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
77 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
78 ],
79 '4m-no-smm-ms' => [
80 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
81 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
82 ],
83 '4m' => [
84 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
85 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
86 ],
87 '4m-ms' => [
88 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
89 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
90 ],
91 # FIXME: These are legacy 2MB-sized images that modern OVMF doesn't supports to build
92 # anymore. how can we deperacate this sanely without breaking existing instances, or using
93 # older backups and snapshot?
94 default => [
95 "$EDK2_FW_BASE/OVMF_CODE.fd",
96 "$EDK2_FW_BASE/OVMF_VARS.fd",
97 ],
98 },
99 aarch64 => {
100 default => [
101 "$EDK2_FW_BASE/AAVMF_CODE.fd",
102 "$EDK2_FW_BASE/AAVMF_VARS.fd",
103 ],
104 },
105 };
106
107 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
108
109 # Note about locking: we use flock on the config file protect against concurent actions.
110 # Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
111 # 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
112 # But you can ignore this kind of lock with the --skiplock flag.
113
114 cfs_register_file(
115 '/qemu-server/',
116 \&parse_vm_config,
117 \&write_vm_config
118 );
119
120 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
121 description => "Some command save/restore state from this location.",
122 type => 'string',
123 maxLength => 128,
124 optional => 1,
125 });
126
127 # FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way
128 my $nodename_cache;
129 sub nodename {
130 $nodename_cache //= PVE::INotify::nodename();
131 return $nodename_cache;
132 }
133
134 my $watchdog_fmt = {
135 model => {
136 default_key => 1,
137 type => 'string',
138 enum => [qw(i6300esb ib700)],
139 description => "Watchdog type to emulate.",
140 default => 'i6300esb',
141 optional => 1,
142 },
143 action => {
144 type => 'string',
145 enum => [qw(reset shutdown poweroff pause debug none)],
146 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
147 optional => 1,
148 },
149 };
150 PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
151
152 my $agent_fmt = {
153 enabled => {
154 description => "Enable/disable communication with a QEMU Guest Agent (QGA) running in the VM.",
155 type => 'boolean',
156 default => 0,
157 default_key => 1,
158 },
159 fstrim_cloned_disks => {
160 description => "Run fstrim after moving a disk or migrating the VM.",
161 type => 'boolean',
162 optional => 1,
163 default => 0,
164 },
165 'freeze-fs-on-backup' => {
166 description => "Freeze/thaw guest filesystems on backup for consistency.",
167 type => 'boolean',
168 optional => 1,
169 default => 1,
170 },
171 type => {
172 description => "Select the agent type",
173 type => 'string',
174 default => 'virtio',
175 optional => 1,
176 enum => [qw(virtio isa)],
177 },
178 };
179
180 my $vga_fmt = {
181 type => {
182 description => "Select the VGA type.",
183 type => 'string',
184 default => 'std',
185 optional => 1,
186 default_key => 1,
187 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware)],
188 },
189 memory => {
190 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
191 type => 'integer',
192 optional => 1,
193 minimum => 4,
194 maximum => 512,
195 },
196 clipboard => {
197 description => 'Enable a specific clipboard. If not set, depending on the display type the'
198 .' SPICE one will be added. Migration with VNC clipboard is not yet supported!',
199 type => 'string',
200 enum => ['vnc'],
201 optional => 1,
202 },
203 };
204
205 my $ivshmem_fmt = {
206 size => {
207 type => 'integer',
208 minimum => 1,
209 description => "The size of the file in MB.",
210 },
211 name => {
212 type => 'string',
213 pattern => '[a-zA-Z0-9\-]+',
214 optional => 1,
215 format_description => 'string',
216 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
217 },
218 };
219
220 my $audio_fmt = {
221 device => {
222 type => 'string',
223 enum => [qw(ich9-intel-hda intel-hda AC97)],
224 description => "Configure an audio device."
225 },
226 driver => {
227 type => 'string',
228 enum => ['spice', 'none'],
229 default => 'spice',
230 optional => 1,
231 description => "Driver backend for the audio device."
232 },
233 };
234
235 my $spice_enhancements_fmt = {
236 foldersharing => {
237 type => 'boolean',
238 optional => 1,
239 default => '0',
240 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
241 },
242 videostreaming => {
243 type => 'string',
244 enum => ['off', 'all', 'filter'],
245 default => 'off',
246 optional => 1,
247 description => "Enable video streaming. Uses compression for detected video streams."
248 },
249 };
250
251 my $rng_fmt = {
252 source => {
253 type => 'string',
254 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
255 default_key => 1,
256 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
257 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
258 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
259 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
260 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
261 ." a hardware RNG from the host.",
262 },
263 max_bytes => {
264 type => 'integer',
265 description => "Maximum bytes of entropy allowed to get injected into the guest every"
266 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
267 ." `0` to disable limiting (potentially dangerous!).",
268 optional => 1,
269
270 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
271 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
272 # reading from /dev/urandom
273 default => 1024,
274 },
275 period => {
276 type => 'integer',
277 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
278 ." the guest to retrieve another 'max_bytes' of entropy.",
279 optional => 1,
280 default => 1000,
281 },
282 };
283
284 my $meta_info_fmt = {
285 'ctime' => {
286 type => 'integer',
287 description => "The guest creation timestamp as UNIX epoch time",
288 minimum => 0,
289 optional => 1,
290 },
291 'creation-qemu' => {
292 type => 'string',
293 description => "The QEMU (machine) version from the time this VM was created.",
294 pattern => '\d+(\.\d+)+',
295 optional => 1,
296 },
297 };
298
299 my $confdesc = {
300 onboot => {
301 optional => 1,
302 type => 'boolean',
303 description => "Specifies whether a VM will be started during system bootup.",
304 default => 0,
305 },
306 autostart => {
307 optional => 1,
308 type => 'boolean',
309 description => "Automatic restart after crash (currently ignored).",
310 default => 0,
311 },
312 hotplug => {
313 optional => 1,
314 type => 'string', format => 'pve-hotplug-features',
315 description => "Selectively enable hotplug features. This is a comma separated list of"
316 ." hotplug features: 'network', 'disk', 'cpu', 'memory', 'usb' and 'cloudinit'. Use '0' to disable"
317 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`."
318 ." USB hotplugging is possible for guests with machine version >= 7.1 and ostype l26 or"
319 ." windows > 7.",
320 default => 'network,disk,usb',
321 },
322 reboot => {
323 optional => 1,
324 type => 'boolean',
325 description => "Allow reboot. If set to '0' the VM exit on reboot.",
326 default => 1,
327 },
328 lock => {
329 optional => 1,
330 type => 'string',
331 description => "Lock/unlock the VM.",
332 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
333 },
334 cpulimit => {
335 optional => 1,
336 type => 'number',
337 description => "Limit of CPU usage.",
338 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
339 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
340 minimum => 0,
341 maximum => 128,
342 default => 0,
343 },
344 cpuunits => {
345 optional => 1,
346 type => 'integer',
347 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
348 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
349 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
350 ." weights of all the other running VMs.",
351 minimum => 1,
352 maximum => 262144,
353 default => 'cgroup v1: 1024, cgroup v2: 100',
354 },
355 memory => {
356 optional => 1,
357 type => 'string',
358 description => "Memory properties.",
359 format => $PVE::QemuServer::Memory::memory_fmt
360 },
361 balloon => {
362 optional => 1,
363 type => 'integer',
364 description => "Amount of target RAM for the VM in MiB. Using zero disables the ballon driver.",
365 minimum => 0,
366 },
367 shares => {
368 optional => 1,
369 type => 'integer',
370 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
371 ." more memory this VM gets. Number is relative to weights of all other running VMs."
372 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
373 minimum => 0,
374 maximum => 50000,
375 default => 1000,
376 },
377 keyboard => {
378 optional => 1,
379 type => 'string',
380 description => "Keyboard layout for VNC server. This option is generally not required and"
381 ." is often better handled from within the guest OS.",
382 enum => PVE::Tools::kvmkeymaplist(),
383 default => undef,
384 },
385 name => {
386 optional => 1,
387 type => 'string', format => 'dns-name',
388 description => "Set a name for the VM. Only used on the configuration web interface.",
389 },
390 scsihw => {
391 optional => 1,
392 type => 'string',
393 description => "SCSI controller model",
394 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
395 default => 'lsi',
396 },
397 description => {
398 optional => 1,
399 type => 'string',
400 description => "Description for the VM. Shown in the web-interface VM's summary."
401 ." This is saved as comment inside the configuration file.",
402 maxLength => 1024 * 8,
403 },
404 ostype => {
405 optional => 1,
406 type => 'string',
407 # NOTE: When extending, also consider extending `%guest_types` in `Import/ESXi.pm`.
408 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
409 description => "Specify guest operating system.",
410 verbose_description => <<EODESC,
411 Specify guest operating system. This is used to enable special
412 optimization/features for specific operating systems:
413
414 [horizontal]
415 other;; unspecified OS
416 wxp;; Microsoft Windows XP
417 w2k;; Microsoft Windows 2000
418 w2k3;; Microsoft Windows 2003
419 w2k8;; Microsoft Windows 2008
420 wvista;; Microsoft Windows Vista
421 win7;; Microsoft Windows 7
422 win8;; Microsoft Windows 8/2012/2012r2
423 win10;; Microsoft Windows 10/2016/2019
424 win11;; Microsoft Windows 11/2022
425 l24;; Linux 2.4 Kernel
426 l26;; Linux 2.6 - 6.X Kernel
427 solaris;; Solaris/OpenSolaris/OpenIndiania kernel
428 EODESC
429 },
430 boot => {
431 optional => 1,
432 type => 'string', format => 'pve-qm-boot',
433 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
434 ." key or 'legacy=' is deprecated.",
435 },
436 bootdisk => {
437 optional => 1,
438 type => 'string', format => 'pve-qm-bootdisk',
439 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
440 pattern => '(ide|sata|scsi|virtio)\d+',
441 },
442 smp => {
443 optional => 1,
444 type => 'integer',
445 description => "The number of CPUs. Please use option -sockets instead.",
446 minimum => 1,
447 default => 1,
448 },
449 sockets => {
450 optional => 1,
451 type => 'integer',
452 description => "The number of CPU sockets.",
453 minimum => 1,
454 default => 1,
455 },
456 cores => {
457 optional => 1,
458 type => 'integer',
459 description => "The number of cores per socket.",
460 minimum => 1,
461 default => 1,
462 },
463 numa => {
464 optional => 1,
465 type => 'boolean',
466 description => "Enable/disable NUMA.",
467 default => 0,
468 },
469 hugepages => {
470 optional => 1,
471 type => 'string',
472 description => "Enable/disable hugepages memory.",
473 enum => [qw(any 2 1024)],
474 },
475 keephugepages => {
476 optional => 1,
477 type => 'boolean',
478 default => 0,
479 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
480 ." after VM shutdown and can be used for subsequent starts.",
481 },
482 vcpus => {
483 optional => 1,
484 type => 'integer',
485 description => "Number of hotplugged vcpus.",
486 minimum => 1,
487 default => 0,
488 },
489 acpi => {
490 optional => 1,
491 type => 'boolean',
492 description => "Enable/disable ACPI.",
493 default => 1,
494 },
495 agent => {
496 optional => 1,
497 description => "Enable/disable communication with the QEMU Guest Agent and its properties.",
498 type => 'string',
499 format => $agent_fmt,
500 },
501 kvm => {
502 optional => 1,
503 type => 'boolean',
504 description => "Enable/disable KVM hardware virtualization.",
505 default => 1,
506 },
507 tdf => {
508 optional => 1,
509 type => 'boolean',
510 description => "Enable/disable time drift fix.",
511 default => 0,
512 },
513 localtime => {
514 optional => 1,
515 type => 'boolean',
516 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
517 ." the `ostype` indicates a Microsoft Windows OS.",
518 },
519 freeze => {
520 optional => 1,
521 type => 'boolean',
522 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
523 },
524 vga => {
525 optional => 1,
526 type => 'string', format => $vga_fmt,
527 description => "Configure the VGA hardware.",
528 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
529 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
530 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
531 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
532 ." display server. For win* OS you can select how many independent displays you want,"
533 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
534 ." using a serial device as terminal.",
535 },
536 watchdog => {
537 optional => 1,
538 type => 'string', format => 'pve-qm-watchdog',
539 description => "Create a virtual hardware watchdog device.",
540 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
541 ." action), the watchdog must be periodically polled by an agent inside the guest or"
542 ." else the watchdog will reset the guest (or execute the respective action specified)",
543 },
544 startdate => {
545 optional => 1,
546 type => 'string',
547 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
548 description => "Set the initial date of the real time clock. Valid format for date are:"
549 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
550 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
551 default => 'now',
552 },
553 startup => get_standard_option('pve-startup-order'),
554 template => {
555 optional => 1,
556 type => 'boolean',
557 description => "Enable/disable Template.",
558 default => 0,
559 },
560 args => {
561 optional => 1,
562 type => 'string',
563 description => "Arbitrary arguments passed to kvm.",
564 verbose_description => <<EODESCR,
565 Arbitrary arguments passed to kvm, for example:
566
567 args: -no-reboot -smbios 'type=0,vendor=FOO'
568
569 NOTE: this option is for experts only.
570 EODESCR
571 },
572 tablet => {
573 optional => 1,
574 type => 'boolean',
575 default => 1,
576 description => "Enable/disable the USB tablet device.",
577 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
578 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
579 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
580 ." may consider disabling this to save some context switches. This is turned off by"
581 ." default if you use spice (`qm set <vmid> --vga qxl`).",
582 },
583 migrate_speed => {
584 optional => 1,
585 type => 'integer',
586 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
587 minimum => 0,
588 default => 0,
589 },
590 migrate_downtime => {
591 optional => 1,
592 type => 'number',
593 description => "Set maximum tolerated downtime (in seconds) for migrations.",
594 minimum => 0,
595 default => 0.1,
596 },
597 cdrom => {
598 optional => 1,
599 type => 'string', format => 'pve-qm-ide',
600 typetext => '<volume>',
601 description => "This is an alias for option -ide2",
602 },
603 cpu => {
604 optional => 1,
605 description => "Emulated CPU type.",
606 type => 'string',
607 format => 'pve-vm-cpu-conf',
608 },
609 parent => get_standard_option('pve-snapshot-name', {
610 optional => 1,
611 description => "Parent snapshot name. This is used internally, and should not be modified.",
612 }),
613 snaptime => {
614 optional => 1,
615 description => "Timestamp for snapshots.",
616 type => 'integer',
617 minimum => 0,
618 },
619 vmstate => {
620 optional => 1,
621 type => 'string', format => 'pve-volume-id',
622 description => "Reference to a volume which stores the VM state. This is used internally"
623 ." for snapshots.",
624 },
625 vmstatestorage => get_standard_option('pve-storage-id', {
626 description => "Default storage for VM state volumes/files.",
627 optional => 1,
628 }),
629 runningmachine => get_standard_option('pve-qemu-machine', {
630 description => "Specifies the QEMU machine type of the running vm. This is used internally"
631 ." for snapshots.",
632 }),
633 runningcpu => {
634 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
635 ." internally for snapshots.",
636 optional => 1,
637 type => 'string',
638 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
639 format_description => 'QEMU -cpu parameter'
640 },
641 machine => get_standard_option('pve-qemu-machine'),
642 arch => {
643 description => "Virtual processor architecture. Defaults to the host.",
644 optional => 1,
645 type => 'string',
646 enum => [qw(x86_64 aarch64)],
647 },
648 smbios1 => {
649 description => "Specify SMBIOS type 1 fields.",
650 type => 'string', format => 'pve-qm-smbios1',
651 maxLength => 512,
652 optional => 1,
653 },
654 protection => {
655 optional => 1,
656 type => 'boolean',
657 description => "Sets the protection flag of the VM. This will disable the remove VM and"
658 ." remove disk operations.",
659 default => 0,
660 },
661 bios => {
662 optional => 1,
663 type => 'string',
664 enum => [ qw(seabios ovmf) ],
665 description => "Select BIOS implementation.",
666 default => 'seabios',
667 },
668 vmgenid => {
669 type => 'string',
670 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
671 format_description => 'UUID',
672 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
673 ." to disable explicitly.",
674 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
675 ." value identifier to the guest OS. This allows to notify the guest operating system"
676 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
677 ." execution or creation from a template). The guest operating system notices the"
678 ." change, and is then able to react as appropriate by marking its copies of"
679 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
680 ."Note that auto-creation only works when done through API/CLI create or update methods"
681 .", but not when manually editing the config file.",
682 default => "1 (autogenerated)",
683 optional => 1,
684 },
685 hookscript => {
686 type => 'string',
687 format => 'pve-volume-id',
688 optional => 1,
689 description => "Script that will be executed during various steps in the vms lifetime.",
690 },
691 ivshmem => {
692 type => 'string',
693 format => $ivshmem_fmt,
694 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
695 ." the host.",
696 optional => 1,
697 },
698 audio0 => {
699 type => 'string',
700 format => $audio_fmt,
701 description => "Configure a audio device, useful in combination with QXL/Spice.",
702 optional => 1
703 },
704 spice_enhancements => {
705 type => 'string',
706 format => $spice_enhancements_fmt,
707 description => "Configure additional enhancements for SPICE.",
708 optional => 1
709 },
710 tags => {
711 type => 'string', format => 'pve-tag-list',
712 description => 'Tags of the VM. This is only meta information.',
713 optional => 1,
714 },
715 rng0 => {
716 type => 'string',
717 format => $rng_fmt,
718 description => "Configure a VirtIO-based Random Number Generator.",
719 optional => 1,
720 },
721 meta => {
722 type => 'string',
723 format => $meta_info_fmt,
724 description => "Some (read-only) meta-information about this guest.",
725 optional => 1,
726 },
727 affinity => {
728 type => 'string', format => 'pve-cpuset',
729 description => "List of host cores used to execute guest processes, for example: 0,5,8-11",
730 optional => 1,
731 },
732 };
733
734 my $cicustom_fmt = {
735 meta => {
736 type => 'string',
737 optional => 1,
738 description => 'Specify a custom file containing all meta data passed to the VM via"
739 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
740 format => 'pve-volume-id',
741 format_description => 'volume',
742 },
743 network => {
744 type => 'string',
745 optional => 1,
746 description => 'To pass a custom file containing all network data to the VM via cloud-init.',
747 format => 'pve-volume-id',
748 format_description => 'volume',
749 },
750 user => {
751 type => 'string',
752 optional => 1,
753 description => 'To pass a custom file containing all user data to the VM via cloud-init.',
754 format => 'pve-volume-id',
755 format_description => 'volume',
756 },
757 vendor => {
758 type => 'string',
759 optional => 1,
760 description => 'To pass a custom file containing all vendor data to the VM via cloud-init.',
761 format => 'pve-volume-id',
762 format_description => 'volume',
763 },
764 };
765 PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
766
767 # any new option might need to be added to $cloudinitoptions in PVE::API2::Qemu
768 my $confdesc_cloudinit = {
769 citype => {
770 optional => 1,
771 type => 'string',
772 description => 'Specifies the cloud-init configuration format. The default depends on the'
773 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
774 .' and `configdrive2` for windows.',
775 enum => ['configdrive2', 'nocloud', 'opennebula'],
776 },
777 ciuser => {
778 optional => 1,
779 type => 'string',
780 description => "cloud-init: User name to change ssh keys and password for instead of the"
781 ." image's configured default user.",
782 },
783 cipassword => {
784 optional => 1,
785 type => 'string',
786 description => 'cloud-init: Password to assign the user. Using this is generally not'
787 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
788 .' support hashed passwords.',
789 },
790 ciupgrade => {
791 optional => 1,
792 type => 'boolean',
793 description => 'cloud-init: do an automatic package upgrade after the first boot.',
794 default => 1,
795 },
796 cicustom => {
797 optional => 1,
798 type => 'string',
799 description => 'cloud-init: Specify custom files to replace the automatically generated'
800 .' ones at start.',
801 format => 'pve-qm-cicustom',
802 },
803 searchdomain => {
804 optional => 1,
805 type => 'string',
806 description => 'cloud-init: Sets DNS search domains for a container. Create will'
807 .' automatically use the setting from the host if neither searchdomain nor nameserver'
808 .' are set.',
809 },
810 nameserver => {
811 optional => 1,
812 type => 'string', format => 'address-list',
813 description => 'cloud-init: Sets DNS server IP address for a container. Create will'
814 .' automatically use the setting from the host if neither searchdomain nor nameserver'
815 .' are set.',
816 },
817 sshkeys => {
818 optional => 1,
819 type => 'string',
820 format => 'urlencoded',
821 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
822 },
823 };
824
825 # what about other qemu settings ?
826 #cpu => 'string',
827 #machine => 'string',
828 #fda => 'file',
829 #fdb => 'file',
830 #mtdblock => 'file',
831 #sd => 'file',
832 #pflash => 'file',
833 #snapshot => 'bool',
834 #bootp => 'file',
835 ##tftp => 'dir',
836 ##smb => 'dir',
837 #kernel => 'file',
838 #append => 'string',
839 #initrd => 'file',
840 ##soundhw => 'string',
841
842 while (my ($k, $v) = each %$confdesc) {
843 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
844 }
845
846 my $MAX_NETS = 32;
847 my $MAX_SERIAL_PORTS = 4;
848 my $MAX_PARALLEL_PORTS = 3;
849
850 for (my $i = 0; $i < $PVE::QemuServer::Memory::MAX_NUMA; $i++) {
851 $confdesc->{"numa$i"} = $PVE::QemuServer::Memory::numadesc;
852 }
853
854 my $nic_model_list = [
855 'e1000',
856 'e1000-82540em',
857 'e1000-82544gc',
858 'e1000-82545em',
859 'e1000e',
860 'i82551',
861 'i82557b',
862 'i82559er',
863 'ne2k_isa',
864 'ne2k_pci',
865 'pcnet',
866 'rtl8139',
867 'virtio',
868 'vmxnet3',
869 ];
870 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
871
872 my $net_fmt_bridge_descr = <<__EOD__;
873 Bridge to attach the network device to. The Proxmox VE standard bridge
874 is called 'vmbr0'.
875
876 If you do not specify a bridge, we create a kvm user (NATed) network
877 device, which provides DHCP and DNS services. The following addresses
878 are used:
879
880 10.0.2.2 Gateway
881 10.0.2.3 DNS Server
882 10.0.2.4 SMB Server
883
884 The DHCP server assign addresses to the guest starting from 10.0.2.15.
885 __EOD__
886
887 my $net_fmt = {
888 macaddr => get_standard_option('mac-addr', {
889 description => "MAC address. That address must be unique withing your network. This is"
890 ." automatically generated if not specified.",
891 }),
892 model => {
893 type => 'string',
894 description => "Network Card Model. The 'virtio' model provides the best performance with"
895 ." very low CPU overhead. If your guest does not support this driver, it is usually"
896 ." best to use 'e1000'.",
897 enum => $nic_model_list,
898 default_key => 1,
899 },
900 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
901 bridge => get_standard_option('pve-bridge-id', {
902 description => $net_fmt_bridge_descr,
903 optional => 1,
904 }),
905 queues => {
906 type => 'integer',
907 minimum => 0, maximum => 64,
908 description => 'Number of packet queues to be used on the device.',
909 optional => 1,
910 },
911 rate => {
912 type => 'number',
913 minimum => 0,
914 description => "Rate limit in mbps (megabytes per second) as floating point number.",
915 optional => 1,
916 },
917 tag => {
918 type => 'integer',
919 minimum => 1, maximum => 4094,
920 description => 'VLAN tag to apply to packets on this interface.',
921 optional => 1,
922 },
923 trunks => {
924 type => 'string',
925 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
926 description => 'VLAN trunks to pass through this interface.',
927 format_description => 'vlanid[;vlanid...]',
928 optional => 1,
929 },
930 firewall => {
931 type => 'boolean',
932 description => 'Whether this interface should be protected by the firewall.',
933 optional => 1,
934 },
935 link_down => {
936 type => 'boolean',
937 description => 'Whether this interface should be disconnected (like pulling the plug).',
938 optional => 1,
939 },
940 mtu => {
941 type => 'integer',
942 minimum => 1, maximum => 65520,
943 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
944 optional => 1,
945 },
946 };
947
948 my $netdesc = {
949 optional => 1,
950 type => 'string', format => $net_fmt,
951 description => "Specify network devices.",
952 };
953
954 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
955
956 my $ipconfig_fmt = {
957 ip => {
958 type => 'string',
959 format => 'pve-ipv4-config',
960 format_description => 'IPv4Format/CIDR',
961 description => 'IPv4 address in CIDR format.',
962 optional => 1,
963 default => 'dhcp',
964 },
965 gw => {
966 type => 'string',
967 format => 'ipv4',
968 format_description => 'GatewayIPv4',
969 description => 'Default gateway for IPv4 traffic.',
970 optional => 1,
971 requires => 'ip',
972 },
973 ip6 => {
974 type => 'string',
975 format => 'pve-ipv6-config',
976 format_description => 'IPv6Format/CIDR',
977 description => 'IPv6 address in CIDR format.',
978 optional => 1,
979 default => 'dhcp',
980 },
981 gw6 => {
982 type => 'string',
983 format => 'ipv6',
984 format_description => 'GatewayIPv6',
985 description => 'Default gateway for IPv6 traffic.',
986 optional => 1,
987 requires => 'ip6',
988 },
989 };
990 PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
991 my $ipconfigdesc = {
992 optional => 1,
993 type => 'string', format => 'pve-qm-ipconfig',
994 description => <<'EODESCR',
995 cloud-init: Specify IP addresses and gateways for the corresponding interface.
996
997 IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
998
999 The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1000 gateway should be provided.
1001 For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1002 cloud-init 19.4 or newer.
1003
1004 If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1005 dhcp on IPv4.
1006 EODESCR
1007 };
1008 PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1009
1010 for (my $i = 0; $i < $MAX_NETS; $i++) {
1011 $confdesc->{"net$i"} = $netdesc;
1012 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1013 }
1014
1015 foreach my $key (keys %$confdesc_cloudinit) {
1016 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1017 }
1018
1019 PVE::JSONSchema::register_format('pve-cpuset', \&pve_verify_cpuset);
1020 sub pve_verify_cpuset {
1021 my ($set_text, $noerr) = @_;
1022
1023 my ($count, $members) = eval { PVE::CpuSet::parse_cpuset($set_text) };
1024
1025 if ($@) {
1026 return if $noerr;
1027 die "unable to parse cpuset option\n";
1028 }
1029
1030 return PVE::CpuSet->new($members)->short_string();
1031 }
1032
1033 PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1034 sub verify_volume_id_or_qm_path {
1035 my ($volid, $noerr) = @_;
1036
1037 return $volid if $volid eq 'none' || $volid eq 'cdrom';
1038
1039 return verify_volume_id_or_absolute_path($volid, $noerr);
1040 }
1041
1042 PVE::JSONSchema::register_format('pve-volume-id-or-absolute-path', \&verify_volume_id_or_absolute_path);
1043 sub verify_volume_id_or_absolute_path {
1044 my ($volid, $noerr) = @_;
1045
1046 return $volid if $volid =~ m|^/|;
1047
1048 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1049 if ($@) {
1050 return if $noerr;
1051 die $@;
1052 }
1053 return $volid;
1054 }
1055
1056 my $serialdesc = {
1057 optional => 1,
1058 type => 'string',
1059 pattern => '(/dev/.+|socket)',
1060 description => "Create a serial device inside the VM (n is 0 to 3)",
1061 verbose_description => <<EODESCR,
1062 Create a serial device inside the VM (n is 0 to 3), and pass through a
1063 host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1064 host side (use 'qm terminal' to open a terminal connection).
1065
1066 NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1067 use with special care.
1068
1069 CAUTION: Experimental! User reported problems with this option.
1070 EODESCR
1071 };
1072
1073 my $paralleldesc= {
1074 optional => 1,
1075 type => 'string',
1076 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1077 description => "Map host parallel devices (n is 0 to 2).",
1078 verbose_description => <<EODESCR,
1079 Map host parallel devices (n is 0 to 2).
1080
1081 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1082 machines - use with special care.
1083
1084 CAUTION: Experimental! User reported problems with this option.
1085 EODESCR
1086 };
1087
1088 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1089 $confdesc->{"parallel$i"} = $paralleldesc;
1090 }
1091
1092 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1093 $confdesc->{"serial$i"} = $serialdesc;
1094 }
1095
1096 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1097 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1098 }
1099
1100 for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1101 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1102 }
1103
1104 for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
1105 $confdesc->{"usb$i"} = $PVE::QemuServer::USB::usbdesc;
1106 }
1107
1108 my $boot_fmt = {
1109 legacy => {
1110 optional => 1,
1111 default_key => 1,
1112 type => 'string',
1113 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1114 . " Deprecated, use 'order=' instead.",
1115 pattern => '[acdn]{1,4}',
1116 format_description => "[acdn]{1,4}",
1117
1118 # note: this is also the fallback if boot: is not given at all
1119 default => 'cdn',
1120 },
1121 order => {
1122 optional => 1,
1123 type => 'string',
1124 format => 'pve-qm-bootdev-list',
1125 format_description => "device[;device...]",
1126 description => <<EODESC,
1127 The guest will attempt to boot from devices in the order they appear here.
1128
1129 Disks, optical drives and passed-through storage USB devices will be directly
1130 booted from, NICs will load PXE, and PCIe devices will either behave like disks
1131 (e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1132
1133 Note that only devices in this list will be marked as bootable and thus loaded
1134 by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1135 (e.g. software-raid), you need to specify all of them here.
1136
1137 Overrides the deprecated 'legacy=[acdn]*' value when given.
1138 EODESC
1139 },
1140 };
1141 PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1142
1143 PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1144 sub verify_bootdev {
1145 my ($dev, $noerr) = @_;
1146
1147 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1148 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1149
1150 my $check = sub {
1151 my ($base) = @_;
1152 return 0 if $dev !~ m/^$base\d+$/;
1153 return 0 if !$confdesc->{$dev};
1154 return 1;
1155 };
1156
1157 return $dev if $check->("net");
1158 return $dev if $check->("usb");
1159 return $dev if $check->("hostpci");
1160
1161 return if $noerr;
1162 die "invalid boot device '$dev'\n";
1163 }
1164
1165 sub print_bootorder {
1166 my ($devs) = @_;
1167 return "" if !@$devs;
1168 my $data = { order => join(';', @$devs) };
1169 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1170 }
1171
1172 my $kvm_api_version = 0;
1173
1174 sub kvm_version {
1175 return $kvm_api_version if $kvm_api_version;
1176
1177 open my $fh, '<', '/dev/kvm' or return;
1178
1179 # 0xae00 => KVM_GET_API_VERSION
1180 $kvm_api_version = ioctl($fh, 0xae00, 0);
1181 close($fh);
1182
1183 return $kvm_api_version;
1184 }
1185
1186 my $kvm_user_version = {};
1187 my $kvm_mtime = {};
1188
1189 sub kvm_user_version {
1190 my ($binary) = @_;
1191
1192 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1193 my $st = stat($binary);
1194
1195 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1196 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1197 $cachedmtime == $st->mtime;
1198
1199 $kvm_user_version->{$binary} = 'unknown';
1200 $kvm_mtime->{$binary} = $st->mtime;
1201
1202 my $code = sub {
1203 my $line = shift;
1204 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1205 $kvm_user_version->{$binary} = $2;
1206 }
1207 };
1208
1209 eval { run_command([$binary, '--version'], outfunc => $code); };
1210 warn $@ if $@;
1211
1212 return $kvm_user_version->{$binary};
1213
1214 }
1215 my sub extract_version {
1216 my ($machine_type, $version) = @_;
1217 $version = kvm_user_version() if !defined($version);
1218 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
1219 }
1220
1221 sub kernel_has_vhost_net {
1222 return -c '/dev/vhost-net';
1223 }
1224
1225 sub option_exists {
1226 my $key = shift;
1227 return defined($confdesc->{$key});
1228 }
1229
1230 my $cdrom_path;
1231 sub get_cdrom_path {
1232
1233 return $cdrom_path if defined($cdrom_path);
1234
1235 $cdrom_path = first { -l $_ } map { "/dev/cdrom$_" } ('', '1', '2');
1236
1237 if (!defined($cdrom_path)) {
1238 log_warn("no physical CD-ROM available, ignoring");
1239 $cdrom_path = '';
1240 }
1241
1242 return $cdrom_path;
1243 }
1244
1245 sub get_iso_path {
1246 my ($storecfg, $vmid, $cdrom) = @_;
1247
1248 if ($cdrom eq 'cdrom') {
1249 return get_cdrom_path();
1250 } elsif ($cdrom eq 'none') {
1251 return '';
1252 } elsif ($cdrom =~ m|^/|) {
1253 return $cdrom;
1254 } else {
1255 return PVE::Storage::path($storecfg, $cdrom);
1256 }
1257 }
1258
1259 # try to convert old style file names to volume IDs
1260 sub filename_to_volume_id {
1261 my ($vmid, $file, $media) = @_;
1262
1263 if (!($file eq 'none' || $file eq 'cdrom' ||
1264 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1265
1266 return if $file =~ m|/|;
1267
1268 if ($media && $media eq 'cdrom') {
1269 $file = "local:iso/$file";
1270 } else {
1271 $file = "local:$vmid/$file";
1272 }
1273 }
1274
1275 return $file;
1276 }
1277
1278 sub verify_media_type {
1279 my ($opt, $vtype, $media) = @_;
1280
1281 return if !$media;
1282
1283 my $etype;
1284 if ($media eq 'disk') {
1285 $etype = 'images';
1286 } elsif ($media eq 'cdrom') {
1287 $etype = 'iso';
1288 } else {
1289 die "internal error";
1290 }
1291
1292 return if ($vtype eq $etype);
1293
1294 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1295 }
1296
1297 sub cleanup_drive_path {
1298 my ($opt, $storecfg, $drive) = @_;
1299
1300 # try to convert filesystem paths to volume IDs
1301
1302 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1303 ($drive->{file} !~ m|^/dev/.+|) &&
1304 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1305 ($drive->{file} !~ m/^\d+$/)) {
1306 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1307 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1308 if !$vtype;
1309 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1310 verify_media_type($opt, $vtype, $drive->{media});
1311 $drive->{file} = $volid;
1312 }
1313
1314 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1315 }
1316
1317 sub parse_hotplug_features {
1318 my ($data) = @_;
1319
1320 my $res = {};
1321
1322 return $res if $data eq '0';
1323
1324 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1325
1326 foreach my $feature (PVE::Tools::split_list($data)) {
1327 if ($feature =~ m/^(network|disk|cpu|memory|usb|cloudinit)$/) {
1328 $res->{$1} = 1;
1329 } else {
1330 die "invalid hotplug feature '$feature'\n";
1331 }
1332 }
1333 return $res;
1334 }
1335
1336 PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1337 sub pve_verify_hotplug_features {
1338 my ($value, $noerr) = @_;
1339
1340 return $value if parse_hotplug_features($value);
1341
1342 return if $noerr;
1343
1344 die "unable to parse hotplug option\n";
1345 }
1346
1347 sub assert_clipboard_config {
1348 my ($vga) = @_;
1349
1350 my $clipboard_regex = qr/^(std|cirrus|vmware|virtio|qxl)/;
1351
1352 if (
1353 $vga->{'clipboard'}
1354 && $vga->{'clipboard'} eq 'vnc'
1355 && $vga->{type}
1356 && $vga->{type} !~ $clipboard_regex
1357 ) {
1358 die "vga type $vga->{type} is not compatible with VNC clipboard\n";
1359 }
1360 }
1361
1362 sub print_tabletdevice_full {
1363 my ($conf, $arch) = @_;
1364
1365 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1366
1367 # we use uhci for old VMs because tablet driver was buggy in older qemu
1368 my $usbbus;
1369 if ($q35 || $arch eq 'aarch64') {
1370 $usbbus = 'ehci';
1371 } else {
1372 $usbbus = 'uhci';
1373 }
1374
1375 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1376 }
1377
1378 sub print_keyboarddevice_full {
1379 my ($conf, $arch) = @_;
1380
1381 return if $arch ne 'aarch64';
1382
1383 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1384 }
1385
1386 my sub get_drive_id {
1387 my ($drive) = @_;
1388 return "$drive->{interface}$drive->{index}";
1389 }
1390
1391 sub print_drivedevice_full {
1392 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1393
1394 my $device = '';
1395 my $maxdev = 0;
1396
1397 my $drive_id = get_drive_id($drive);
1398 if ($drive->{interface} eq 'virtio') {
1399 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1400 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1401 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1402 } elsif ($drive->{interface} eq 'scsi') {
1403
1404 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1405 my $unit = $drive->{index} % $maxdev;
1406
1407 my $machine_version = extract_version($machine_type, kvm_user_version());
1408 my $device_type = PVE::QemuServer::Drive::get_scsi_device_type(
1409 $drive, $storecfg, $machine_version);
1410
1411 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1412 $device = "scsi-$device_type,bus=$controller_prefix$controller.0,scsi-id=$unit";
1413 } else {
1414 $device = "scsi-$device_type,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1415 .",lun=$drive->{index}";
1416 }
1417 $device .= ",drive=drive-$drive_id,id=$drive_id";
1418
1419 if ($drive->{ssd} && ($device_type eq 'block' || $device_type eq 'hd')) {
1420 $device .= ",rotation_rate=1";
1421 }
1422 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1423
1424 # only scsi-hd and scsi-cd support passing vendor and product information
1425 if ($device_type eq 'hd' || $device_type eq 'cd') {
1426 if (my $vendor = $drive->{vendor}) {
1427 $device .= ",vendor=$vendor";
1428 }
1429 if (my $product = $drive->{product}) {
1430 $device .= ",product=$product";
1431 }
1432 }
1433
1434 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1435 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1436 my $controller = int($drive->{index} / $maxdev);
1437 my $unit = $drive->{index} % $maxdev;
1438
1439 # machine type q35 only supports unit=0 for IDE rather than 2 units. This wasn't handled
1440 # correctly before, so e.g. index=2 was mapped to controller=1,unit=0 rather than
1441 # controller=2,unit=0. Note that odd indices never worked, as they would be mapped to
1442 # unit=1, so to keep backwards compat for migration, it suffices to keep even ones as they
1443 # were before. Move odd ones up by 2 where they don't clash.
1444 if (PVE::QemuServer::Machine::machine_type_is_q35($conf) && $drive->{interface} eq 'ide') {
1445 $controller += 2 * ($unit % 2);
1446 $unit = 0;
1447 }
1448
1449 my $device_type = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1450
1451 $device = "ide-$device_type";
1452 if ($drive->{interface} eq 'ide') {
1453 $device .= ",bus=ide.$controller,unit=$unit";
1454 } else {
1455 $device .= ",bus=ahci$controller.$unit";
1456 }
1457 $device .= ",drive=drive-$drive_id,id=$drive_id";
1458
1459 if ($device_type eq 'hd') {
1460 if (my $model = $drive->{model}) {
1461 $model = URI::Escape::uri_unescape($model);
1462 $device .= ",model=$model";
1463 }
1464 if ($drive->{ssd}) {
1465 $device .= ",rotation_rate=1";
1466 }
1467 }
1468 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1469 } elsif ($drive->{interface} eq 'usb') {
1470 die "implement me";
1471 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1472 } else {
1473 die "unsupported interface type";
1474 }
1475
1476 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1477
1478 if (my $serial = $drive->{serial}) {
1479 $serial = URI::Escape::uri_unescape($serial);
1480 $device .= ",serial=$serial";
1481 }
1482
1483
1484 return $device;
1485 }
1486
1487 sub get_initiator_name {
1488 my $initiator;
1489
1490 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1491 while (defined(my $line = <$fh>)) {
1492 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1493 $initiator = $1;
1494 last;
1495 }
1496 $fh->close();
1497
1498 return $initiator;
1499 }
1500
1501 my sub storage_allows_io_uring_default {
1502 my ($scfg, $cache_direct) = @_;
1503
1504 # io_uring with cache mode writeback or writethrough on krbd will hang...
1505 return if $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1506
1507 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1508 # sometimes, just plain disable...
1509 return if $scfg && $scfg->{type} eq 'lvm';
1510
1511 # io_uring causes problems when used with CIFS since kernel 5.15
1512 # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
1513 return if $scfg && $scfg->{type} eq 'cifs';
1514
1515 return 1;
1516 }
1517
1518 my sub drive_uses_cache_direct {
1519 my ($drive, $scfg) = @_;
1520
1521 my $cache_direct = 0;
1522
1523 if (my $cache = $drive->{cache}) {
1524 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1525 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1526 $cache_direct = 1;
1527 }
1528
1529 return $cache_direct;
1530 }
1531
1532 sub print_drive_commandline_full {
1533 my ($storecfg, $vmid, $drive, $live_restore_name, $io_uring) = @_;
1534
1535 my $path;
1536 my $volid = $drive->{file};
1537 my $format = $drive->{format};
1538 my $drive_id = get_drive_id($drive);
1539
1540 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1541 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1542
1543 if (drive_is_cdrom($drive)) {
1544 $path = get_iso_path($storecfg, $vmid, $volid);
1545 die "$drive_id: cannot back cdrom drive with a live restore image\n" if $live_restore_name;
1546 } else {
1547 if ($storeid) {
1548 $path = PVE::Storage::path($storecfg, $volid);
1549 $format //= qemu_img_format($scfg, $volname);
1550 } else {
1551 $path = $volid;
1552 $format //= "raw";
1553 }
1554 }
1555
1556 my $is_rbd = $path =~ m/^rbd:/;
1557
1558 my $opts = '';
1559 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1560 foreach my $o (@qemu_drive_options) {
1561 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1562 }
1563
1564 # snapshot only accepts on|off
1565 if (defined($drive->{snapshot})) {
1566 my $v = $drive->{snapshot} ? 'on' : 'off';
1567 $opts .= ",snapshot=$v";
1568 }
1569
1570 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1571 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
1572 }
1573
1574 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1575 my ($dir, $qmpname) = @$type;
1576 if (my $v = $drive->{"mbps$dir"}) {
1577 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1578 }
1579 if (my $v = $drive->{"mbps${dir}_max"}) {
1580 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1581 }
1582 if (my $v = $drive->{"bps${dir}_max_length"}) {
1583 $opts .= ",throttling.bps$qmpname-max-length=$v";
1584 }
1585 if (my $v = $drive->{"iops${dir}"}) {
1586 $opts .= ",throttling.iops$qmpname=$v";
1587 }
1588 if (my $v = $drive->{"iops${dir}_max"}) {
1589 $opts .= ",throttling.iops$qmpname-max=$v";
1590 }
1591 if (my $v = $drive->{"iops${dir}_max_length"}) {
1592 $opts .= ",throttling.iops$qmpname-max-length=$v";
1593 }
1594 }
1595
1596 if ($live_restore_name) {
1597 $format = "rbd" if $is_rbd;
1598 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1599 if !$format;
1600 $opts .= ",format=alloc-track,file.driver=$format";
1601 } elsif ($format) {
1602 $opts .= ",format=$format";
1603 }
1604
1605 my $cache_direct = drive_uses_cache_direct($drive, $scfg);
1606
1607 $opts .= ",cache=none" if !$drive->{cache} && $cache_direct;
1608
1609 if (!$drive->{aio}) {
1610 if ($io_uring && storage_allows_io_uring_default($scfg, $cache_direct)) {
1611 # io_uring supports all cache modes
1612 $opts .= ",aio=io_uring";
1613 } else {
1614 # aio native works only with O_DIRECT
1615 if($cache_direct) {
1616 $opts .= ",aio=native";
1617 } else {
1618 $opts .= ",aio=threads";
1619 }
1620 }
1621 }
1622
1623 if (!drive_is_cdrom($drive)) {
1624 my $detectzeroes;
1625 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1626 $detectzeroes = 'off';
1627 } elsif ($drive->{discard}) {
1628 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1629 } else {
1630 # This used to be our default with discard not being specified:
1631 $detectzeroes = 'on';
1632 }
1633
1634 # note: 'detect-zeroes' works per blockdev and we want it to persist
1635 # after the alloc-track is removed, so put it on 'file' directly
1636 my $dz_param = $live_restore_name ? "file.detect-zeroes" : "detect-zeroes";
1637 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1638 }
1639
1640 if ($live_restore_name) {
1641 $opts .= ",backing=$live_restore_name";
1642 $opts .= ",auto-remove=on";
1643 }
1644
1645 # my $file_param = $live_restore_name ? "file.file.filename" : "file";
1646 my $file_param = "file";
1647 if ($live_restore_name) {
1648 # non-rbd drivers require the underlying file to be a seperate block
1649 # node, so add a second .file indirection
1650 $file_param .= ".file" if !$is_rbd;
1651 $file_param .= ".filename";
1652 }
1653 my $pathinfo = $path ? "$file_param=$path," : '';
1654
1655 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1656 }
1657
1658 sub print_pbs_blockdev {
1659 my ($pbs_conf, $pbs_name) = @_;
1660 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1661 $blockdev .= ",repository=$pbs_conf->{repository}";
1662 $blockdev .= ",namespace=$pbs_conf->{namespace}" if $pbs_conf->{namespace};
1663 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1664 $blockdev .= ",archive=$pbs_conf->{archive}";
1665 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1666 return $blockdev;
1667 }
1668
1669 sub print_netdevice_full {
1670 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version) = @_;
1671
1672 my $device = $net->{model};
1673 if ($net->{model} eq 'virtio') {
1674 $device = 'virtio-net-pci';
1675 };
1676
1677 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1678 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1679 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1680 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1681 # and out of each queue plus one config interrupt and control vector queue
1682 my $vectors = $net->{queues} * 2 + 2;
1683 $tmpstr .= ",vectors=$vectors,mq=on";
1684 if (min_version($machine_version, 7, 1)) {
1685 $tmpstr .= ",packed=on";
1686 }
1687 }
1688
1689 if (min_version($machine_version, 7, 1) && $net->{model} eq 'virtio'){
1690 $tmpstr .= ",rx_queue_size=1024,tx_queue_size=256";
1691 }
1692
1693 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1694
1695 if (my $mtu = $net->{mtu}) {
1696 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1697 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1698 if ($mtu == 1) {
1699 $mtu = $bridge_mtu;
1700 } elsif ($mtu < 576) {
1701 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1702 } elsif ($mtu > $bridge_mtu) {
1703 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1704 }
1705 $tmpstr .= ",host_mtu=$mtu";
1706 } else {
1707 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1708 }
1709 }
1710
1711 if ($use_old_bios_files) {
1712 my $romfile;
1713 if ($device eq 'virtio-net-pci') {
1714 $romfile = 'pxe-virtio.rom';
1715 } elsif ($device eq 'e1000') {
1716 $romfile = 'pxe-e1000.rom';
1717 } elsif ($device eq 'e1000e') {
1718 $romfile = 'pxe-e1000e.rom';
1719 } elsif ($device eq 'ne2k') {
1720 $romfile = 'pxe-ne2k_pci.rom';
1721 } elsif ($device eq 'pcnet') {
1722 $romfile = 'pxe-pcnet.rom';
1723 } elsif ($device eq 'rtl8139') {
1724 $romfile = 'pxe-rtl8139.rom';
1725 }
1726 $tmpstr .= ",romfile=$romfile" if $romfile;
1727 }
1728
1729 return $tmpstr;
1730 }
1731
1732 sub print_netdev_full {
1733 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1734
1735 my $i = '';
1736 if ($netid =~ m/^net(\d+)$/) {
1737 $i = int($1);
1738 }
1739
1740 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1741
1742 my $ifname = "tap${vmid}i$i";
1743
1744 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1745 die "interface name '$ifname' is too long (max 15 character)\n"
1746 if length($ifname) >= 16;
1747
1748 my $vhostparam = '';
1749 if (is_native_arch($arch)) {
1750 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1751 }
1752
1753 my $vmname = $conf->{name} || "vm$vmid";
1754
1755 my $netdev = "";
1756 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1757
1758 if ($net->{bridge}) {
1759 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1760 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1761 } else {
1762 $netdev = "type=user,id=$netid,hostname=$vmname";
1763 }
1764
1765 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1766
1767 return $netdev;
1768 }
1769
1770 my $vga_map = {
1771 'cirrus' => 'cirrus-vga',
1772 'std' => 'VGA',
1773 'vmware' => 'vmware-svga',
1774 'virtio' => 'virtio-vga',
1775 'virtio-gl' => 'virtio-vga-gl',
1776 };
1777
1778 sub print_vga_device {
1779 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1780
1781 my $type = $vga_map->{$vga->{type}};
1782 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1783 $type = 'virtio-gpu';
1784 }
1785 my $vgamem_mb = $vga->{memory};
1786
1787 my $max_outputs = '';
1788 if ($qxlnum) {
1789 $type = $id ? 'qxl' : 'qxl-vga';
1790
1791 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1792 # set max outputs so linux can have up to 4 qxl displays with one device
1793 if (min_version($machine_version, 4, 1)) {
1794 $max_outputs = ",max_outputs=4";
1795 }
1796 }
1797 }
1798
1799 die "no device-type for $vga->{type}\n" if !$type;
1800
1801 my $memory = "";
1802 if ($vgamem_mb) {
1803 if ($vga->{type} =~ /^virtio/) {
1804 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1805 $memory = ",max_hostmem=$bytes";
1806 } elsif ($qxlnum) {
1807 # from https://www.spice-space.org/multiple-monitors.html
1808 $memory = ",vgamem_mb=$vga->{memory}";
1809 my $ram = $vgamem_mb * 4;
1810 my $vram = $vgamem_mb * 2;
1811 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1812 } else {
1813 $memory = ",vgamem_mb=$vga->{memory}";
1814 }
1815 } elsif ($qxlnum && $id) {
1816 $memory = ",ram_size=67108864,vram_size=33554432";
1817 }
1818
1819 my $edidoff = "";
1820 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1821 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1822 }
1823
1824 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1825 my $vgaid = "vga" . ($id // '');
1826 my $pciaddr;
1827 if ($q35 && $vgaid eq 'vga') {
1828 # the first display uses pcie.0 bus on q35 machines
1829 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1830 } else {
1831 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1832 }
1833
1834 if ($vga->{type} eq 'virtio-gl') {
1835 my $base = '/usr/lib/x86_64-linux-gnu/lib';
1836 die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
1837 if !-e "${base}EGL.so.1" || !-e "${base}GL.so.1";
1838
1839 die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
1840 if !PVE::Tools::dir_glob_regex('/dev/dri/', "renderD.*");
1841 }
1842
1843 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1844 }
1845
1846 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1847 sub parse_net {
1848 my ($data, $disable_mac_autogen) = @_;
1849
1850 my $res = eval { parse_property_string($net_fmt, $data) };
1851 if ($@) {
1852 warn $@;
1853 return;
1854 }
1855 if (!defined($res->{macaddr}) && !$disable_mac_autogen) {
1856 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1857 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1858 }
1859 return $res;
1860 }
1861
1862 # ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1863 sub parse_ipconfig {
1864 my ($data) = @_;
1865
1866 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1867 if ($@) {
1868 warn $@;
1869 return;
1870 }
1871
1872 if ($res->{gw} && !$res->{ip}) {
1873 warn 'gateway specified without specifying an IP address';
1874 return;
1875 }
1876 if ($res->{gw6} && !$res->{ip6}) {
1877 warn 'IPv6 gateway specified without specifying an IPv6 address';
1878 return;
1879 }
1880 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1881 warn 'gateway specified together with DHCP';
1882 return;
1883 }
1884 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1885 # gw6 + auto/dhcp
1886 warn "IPv6 gateway specified together with $res->{ip6} address";
1887 return;
1888 }
1889
1890 if (!$res->{ip} && !$res->{ip6}) {
1891 return { ip => 'dhcp', ip6 => 'dhcp' };
1892 }
1893
1894 return $res;
1895 }
1896
1897 sub print_net {
1898 my $net = shift;
1899
1900 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1901 }
1902
1903 sub add_random_macs {
1904 my ($settings) = @_;
1905
1906 foreach my $opt (keys %$settings) {
1907 next if $opt !~ m/^net(\d+)$/;
1908 my $net = parse_net($settings->{$opt});
1909 next if !$net;
1910 $settings->{$opt} = print_net($net);
1911 }
1912 }
1913
1914 sub vm_is_volid_owner {
1915 my ($storecfg, $vmid, $volid) = @_;
1916
1917 if ($volid !~ m|^/|) {
1918 my ($path, $owner);
1919 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
1920 if ($owner && ($owner == $vmid)) {
1921 return 1;
1922 }
1923 }
1924
1925 return;
1926 }
1927
1928 sub vmconfig_register_unused_drive {
1929 my ($storecfg, $vmid, $conf, $drive) = @_;
1930
1931 if (drive_is_cloudinit($drive)) {
1932 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
1933 warn $@ if $@;
1934 delete $conf->{cloudinit};
1935 } elsif (!drive_is_cdrom($drive)) {
1936 my $volid = $drive->{file};
1937 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
1938 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
1939 }
1940 }
1941 }
1942
1943 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
1944 my $smbios1_fmt = {
1945 uuid => {
1946 type => 'string',
1947 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
1948 format_description => 'UUID',
1949 description => "Set SMBIOS1 UUID.",
1950 optional => 1,
1951 },
1952 version => {
1953 type => 'string',
1954 pattern => '[A-Za-z0-9+\/]+={0,2}',
1955 format_description => 'Base64 encoded string',
1956 description => "Set SMBIOS1 version.",
1957 optional => 1,
1958 },
1959 serial => {
1960 type => 'string',
1961 pattern => '[A-Za-z0-9+\/]+={0,2}',
1962 format_description => 'Base64 encoded string',
1963 description => "Set SMBIOS1 serial number.",
1964 optional => 1,
1965 },
1966 manufacturer => {
1967 type => 'string',
1968 pattern => '[A-Za-z0-9+\/]+={0,2}',
1969 format_description => 'Base64 encoded string',
1970 description => "Set SMBIOS1 manufacturer.",
1971 optional => 1,
1972 },
1973 product => {
1974 type => 'string',
1975 pattern => '[A-Za-z0-9+\/]+={0,2}',
1976 format_description => 'Base64 encoded string',
1977 description => "Set SMBIOS1 product ID.",
1978 optional => 1,
1979 },
1980 sku => {
1981 type => 'string',
1982 pattern => '[A-Za-z0-9+\/]+={0,2}',
1983 format_description => 'Base64 encoded string',
1984 description => "Set SMBIOS1 SKU string.",
1985 optional => 1,
1986 },
1987 family => {
1988 type => 'string',
1989 pattern => '[A-Za-z0-9+\/]+={0,2}',
1990 format_description => 'Base64 encoded string',
1991 description => "Set SMBIOS1 family string.",
1992 optional => 1,
1993 },
1994 base64 => {
1995 type => 'boolean',
1996 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
1997 optional => 1,
1998 },
1999 };
2000
2001 sub parse_smbios1 {
2002 my ($data) = @_;
2003
2004 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2005 warn $@ if $@;
2006 return $res;
2007 }
2008
2009 sub print_smbios1 {
2010 my ($smbios1) = @_;
2011 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2012 }
2013
2014 PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2015
2016 sub parse_watchdog {
2017 my ($value) = @_;
2018
2019 return if !$value;
2020
2021 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2022 warn $@ if $@;
2023 return $res;
2024 }
2025
2026 sub parse_guest_agent {
2027 my ($conf) = @_;
2028
2029 return {} if !defined($conf->{agent});
2030
2031 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2032 warn $@ if $@;
2033
2034 # if the agent is disabled ignore the other potentially set properties
2035 return {} if !$res->{enabled};
2036 return $res;
2037 }
2038
2039 sub get_qga_key {
2040 my ($conf, $key) = @_;
2041 return undef if !defined($conf->{agent});
2042
2043 my $agent = parse_guest_agent($conf);
2044 return $agent->{$key};
2045 }
2046
2047 sub parse_vga {
2048 my ($value) = @_;
2049
2050 return {} if !$value;
2051 my $res = eval { parse_property_string($vga_fmt, $value) };
2052 warn $@ if $@;
2053 return $res;
2054 }
2055
2056 sub parse_rng {
2057 my ($value) = @_;
2058
2059 return if !$value;
2060
2061 my $res = eval { parse_property_string($rng_fmt, $value) };
2062 warn $@ if $@;
2063 return $res;
2064 }
2065
2066 sub parse_meta_info {
2067 my ($value) = @_;
2068
2069 return if !$value;
2070
2071 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2072 warn $@ if $@;
2073 return $res;
2074 }
2075
2076 sub new_meta_info_string {
2077 my () = @_; # for now do not allow to override any value
2078
2079 return PVE::JSONSchema::print_property_string(
2080 {
2081 'creation-qemu' => kvm_user_version(),
2082 ctime => "". int(time()),
2083 },
2084 $meta_info_fmt
2085 );
2086 }
2087
2088 sub qemu_created_version_fixups {
2089 my ($conf, $forcemachine, $kvmver) = @_;
2090
2091 my $meta = parse_meta_info($conf->{meta}) // {};
2092 my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
2093
2094 # check if we need to apply some handling for VMs that always use the latest machine version but
2095 # had a machine version transition happen that affected HW such that, e.g., an OS config change
2096 # would be required (we do not want to pin machine version for non-windows OS type)
2097 my $machine_conf = PVE::QemuServer::Machine::parse_machine($conf->{machine});
2098 if (
2099 (!defined($machine_conf->{type}) || $machine_conf->{type} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
2100 && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
2101 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
2102 && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
2103 ) {
2104 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
2105 if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
2106 # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
2107 # and thus with the predictable interface naming of systemd
2108 return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
2109 }
2110 }
2111 return;
2112 }
2113
2114 # add JSON properties for create and set function
2115 sub json_config_properties {
2116 my ($prop, $with_disk_alloc) = @_;
2117
2118 my $skip_json_config_opts = {
2119 parent => 1,
2120 snaptime => 1,
2121 vmstate => 1,
2122 runningmachine => 1,
2123 runningcpu => 1,
2124 meta => 1,
2125 };
2126
2127 foreach my $opt (keys %$confdesc) {
2128 next if $skip_json_config_opts->{$opt};
2129
2130 if ($with_disk_alloc && is_valid_drivename($opt)) {
2131 $prop->{$opt} = $PVE::QemuServer::Drive::drivedesc_hash_with_alloc->{$opt};
2132 } else {
2133 $prop->{$opt} = $confdesc->{$opt};
2134 }
2135 }
2136
2137 return $prop;
2138 }
2139
2140 # Properties that we can read from an OVF file
2141 sub json_ovf_properties {
2142 my $prop = {};
2143
2144 for my $device (PVE::QemuServer::Drive::valid_drive_names()) {
2145 $prop->{$device} = {
2146 type => 'string',
2147 format => 'pve-volume-id-or-absolute-path',
2148 description => "Disk image that gets imported to $device",
2149 optional => 1,
2150 };
2151 }
2152
2153 $prop->{cores} = {
2154 type => 'integer',
2155 description => "The number of CPU cores.",
2156 optional => 1,
2157 };
2158 $prop->{memory} = {
2159 type => 'integer',
2160 description => "Amount of RAM for the VM in MB.",
2161 optional => 1,
2162 };
2163 $prop->{name} = {
2164 type => 'string',
2165 description => "Name of the VM.",
2166 optional => 1,
2167 };
2168
2169 return $prop;
2170 }
2171
2172 # return copy of $confdesc_cloudinit to generate documentation
2173 sub cloudinit_config_properties {
2174
2175 return dclone($confdesc_cloudinit);
2176 }
2177
2178 sub cloudinit_pending_properties {
2179 my $p = {
2180 map { $_ => 1 } keys $confdesc_cloudinit->%*,
2181 name => 1,
2182 };
2183 $p->{"net$_"} = 1 for 0..($MAX_NETS-1);
2184 return $p;
2185 }
2186
2187 sub check_type {
2188 my ($key, $value) = @_;
2189
2190 die "unknown setting '$key'\n" if !$confdesc->{$key};
2191
2192 my $type = $confdesc->{$key}->{type};
2193
2194 if (!defined($value)) {
2195 die "got undefined value\n";
2196 }
2197
2198 if ($value =~ m/[\n\r]/) {
2199 die "property contains a line feed\n";
2200 }
2201
2202 if ($type eq 'boolean') {
2203 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2204 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2205 die "type check ('boolean') failed - got '$value'\n";
2206 } elsif ($type eq 'integer') {
2207 return int($1) if $value =~ m/^(\d+)$/;
2208 die "type check ('integer') failed - got '$value'\n";
2209 } elsif ($type eq 'number') {
2210 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2211 die "type check ('number') failed - got '$value'\n";
2212 } elsif ($type eq 'string') {
2213 if (my $fmt = $confdesc->{$key}->{format}) {
2214 PVE::JSONSchema::check_format($fmt, $value);
2215 return $value;
2216 }
2217 $value =~ s/^\"(.*)\"$/$1/;
2218 return $value;
2219 } else {
2220 die "internal error"
2221 }
2222 }
2223
2224 sub destroy_vm {
2225 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2226
2227 my $conf = PVE::QemuConfig->load_config($vmid);
2228
2229 if (!$skiplock && !PVE::QemuConfig->has_lock($conf, 'suspended')) {
2230 PVE::QemuConfig->check_lock($conf);
2231 }
2232
2233 if ($conf->{template}) {
2234 # check if any base image is still used by a linked clone
2235 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2236 my ($ds, $drive) = @_;
2237 return if drive_is_cdrom($drive);
2238
2239 my $volid = $drive->{file};
2240 return if !$volid || $volid =~ m|^/|;
2241
2242 die "base volume '$volid' is still in use by linked cloned\n"
2243 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2244
2245 });
2246 }
2247
2248 my $volids = {};
2249 my $remove_owned_drive = sub {
2250 my ($ds, $drive) = @_;
2251 return if drive_is_cdrom($drive, 1);
2252
2253 my $volid = $drive->{file};
2254 return if !$volid || $volid =~ m|^/|;
2255 return if $volids->{$volid};
2256
2257 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2258 return if !$path || !$owner || ($owner != $vmid);
2259
2260 $volids->{$volid} = 1;
2261 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2262 warn "Could not remove disk '$volid', check manually: $@" if $@;
2263 };
2264
2265 # only remove disks owned by this VM (referenced in the config)
2266 my $include_opts = {
2267 include_unused => 1,
2268 extra_keys => ['vmstate'],
2269 };
2270 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2271
2272 for my $snap (values %{$conf->{snapshots}}) {
2273 next if !defined($snap->{vmstate});
2274 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2275 next if !defined($drive);
2276 $remove_owned_drive->('vmstate', $drive);
2277 }
2278
2279 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2280
2281 if ($purge_unreferenced) { # also remove unreferenced disk
2282 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2283 PVE::Storage::foreach_volid($vmdisks, sub {
2284 my ($volid, $sid, $volname, $d) = @_;
2285 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2286 warn $@ if $@;
2287 });
2288 }
2289
2290 eval { delete_ifaces_ipams_ips($conf, $vmid)};
2291 warn $@ if $@;
2292
2293 if (defined $replacement_conf) {
2294 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2295 } else {
2296 PVE::QemuConfig->destroy_config($vmid);
2297 }
2298 }
2299
2300 sub parse_vm_config {
2301 my ($filename, $raw, $strict) = @_;
2302
2303 return if !defined($raw);
2304
2305 my $res = {
2306 digest => Digest::SHA::sha1_hex($raw),
2307 snapshots => {},
2308 pending => {},
2309 cloudinit => {},
2310 };
2311
2312 my $handle_error = sub {
2313 my ($msg) = @_;
2314
2315 if ($strict) {
2316 die $msg;
2317 } else {
2318 warn $msg;
2319 }
2320 };
2321
2322 $filename =~ m|/qemu-server/(\d+)\.conf$|
2323 || die "got strange filename '$filename'";
2324
2325 my $vmid = $1;
2326
2327 my $conf = $res;
2328 my $descr;
2329 my $finish_description = sub {
2330 if (defined($descr)) {
2331 $descr =~ s/\s+$//;
2332 $conf->{description} = $descr;
2333 }
2334 $descr = undef;
2335 };
2336 my $section = '';
2337
2338 my @lines = split(/\n/, $raw);
2339 foreach my $line (@lines) {
2340 next if $line =~ m/^\s*$/;
2341
2342 if ($line =~ m/^\[PENDING\]\s*$/i) {
2343 $section = 'pending';
2344 $finish_description->();
2345 $conf = $res->{$section} = {};
2346 next;
2347 } elsif ($line =~ m/^\[special:cloudinit\]\s*$/i) {
2348 $section = 'cloudinit';
2349 $finish_description->();
2350 $conf = $res->{$section} = {};
2351 next;
2352
2353 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2354 $section = $1;
2355 $finish_description->();
2356 $conf = $res->{snapshots}->{$section} = {};
2357 next;
2358 }
2359
2360 if ($line =~ m/^\#(.*)$/) {
2361 $descr = '' if !defined($descr);
2362 $descr .= PVE::Tools::decode_text($1) . "\n";
2363 next;
2364 }
2365
2366 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2367 $descr = '' if !defined($descr);
2368 $descr .= PVE::Tools::decode_text($2);
2369 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2370 $conf->{snapstate} = $1;
2371 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2372 my $key = $1;
2373 my $value = $2;
2374 $conf->{$key} = $value;
2375 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2376 my $value = $1;
2377 if ($section eq 'pending') {
2378 $conf->{delete} = $value; # we parse this later
2379 } else {
2380 $handle_error->("vm $vmid - property 'delete' is only allowed in [PENDING]\n");
2381 }
2382 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2383 my $key = $1;
2384 my $value = $2;
2385 if ($section eq 'cloudinit') {
2386 # ignore validation only used for informative purpose
2387 $conf->{$key} = $value;
2388 next;
2389 }
2390 eval { $value = check_type($key, $value); };
2391 if ($@) {
2392 $handle_error->("vm $vmid - unable to parse value of '$key' - $@");
2393 } else {
2394 $key = 'ide2' if $key eq 'cdrom';
2395 my $fmt = $confdesc->{$key}->{format};
2396 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2397 my $v = parse_drive($key, $value);
2398 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2399 $v->{file} = $volid;
2400 $value = print_drive($v);
2401 } else {
2402 $handle_error->("vm $vmid - unable to parse value of '$key'\n");
2403 next;
2404 }
2405 }
2406
2407 $conf->{$key} = $value;
2408 }
2409 } else {
2410 $handle_error->("vm $vmid - unable to parse config: $line\n");
2411 }
2412 }
2413
2414 $finish_description->();
2415 delete $res->{snapstate}; # just to be sure
2416
2417 return $res;
2418 }
2419
2420 sub write_vm_config {
2421 my ($filename, $conf) = @_;
2422
2423 delete $conf->{snapstate}; # just to be sure
2424
2425 if ($conf->{cdrom}) {
2426 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2427 $conf->{ide2} = $conf->{cdrom};
2428 delete $conf->{cdrom};
2429 }
2430
2431 # we do not use 'smp' any longer
2432 if ($conf->{sockets}) {
2433 delete $conf->{smp};
2434 } elsif ($conf->{smp}) {
2435 $conf->{sockets} = $conf->{smp};
2436 delete $conf->{cores};
2437 delete $conf->{smp};
2438 }
2439
2440 my $used_volids = {};
2441
2442 my $cleanup_config = sub {
2443 my ($cref, $pending, $snapname) = @_;
2444
2445 foreach my $key (keys %$cref) {
2446 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2447 $key eq 'snapstate' || $key eq 'pending' || $key eq 'cloudinit';
2448 my $value = $cref->{$key};
2449 if ($key eq 'delete') {
2450 die "propertry 'delete' is only allowed in [PENDING]\n"
2451 if !$pending;
2452 # fixme: check syntax?
2453 next;
2454 }
2455 eval { $value = check_type($key, $value); };
2456 die "unable to parse value of '$key' - $@" if $@;
2457
2458 $cref->{$key} = $value;
2459
2460 if (!$snapname && is_valid_drivename($key)) {
2461 my $drive = parse_drive($key, $value);
2462 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2463 }
2464 }
2465 };
2466
2467 &$cleanup_config($conf);
2468
2469 &$cleanup_config($conf->{pending}, 1);
2470
2471 foreach my $snapname (keys %{$conf->{snapshots}}) {
2472 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2473 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2474 }
2475
2476 # remove 'unusedX' settings if we re-add a volume
2477 foreach my $key (keys %$conf) {
2478 my $value = $conf->{$key};
2479 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2480 delete $conf->{$key};
2481 }
2482 }
2483
2484 my $generate_raw_config = sub {
2485 my ($conf, $pending) = @_;
2486
2487 my $raw = '';
2488
2489 # add description as comment to top of file
2490 if (defined(my $descr = $conf->{description})) {
2491 if ($descr) {
2492 foreach my $cl (split(/\n/, $descr)) {
2493 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2494 }
2495 } else {
2496 $raw .= "#\n" if $pending;
2497 }
2498 }
2499
2500 foreach my $key (sort keys %$conf) {
2501 next if $key =~ /^(digest|description|pending|cloudinit|snapshots)$/;
2502 $raw .= "$key: $conf->{$key}\n";
2503 }
2504 return $raw;
2505 };
2506
2507 my $raw = &$generate_raw_config($conf);
2508
2509 if (scalar(keys %{$conf->{pending}})){
2510 $raw .= "\n[PENDING]\n";
2511 $raw .= &$generate_raw_config($conf->{pending}, 1);
2512 }
2513
2514 if (scalar(keys %{$conf->{cloudinit}}) && PVE::QemuConfig->has_cloudinit($conf)){
2515 $raw .= "\n[special:cloudinit]\n";
2516 $raw .= &$generate_raw_config($conf->{cloudinit});
2517 }
2518
2519 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2520 $raw .= "\n[$snapname]\n";
2521 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2522 }
2523
2524 return $raw;
2525 }
2526
2527 sub load_defaults {
2528
2529 my $res = {};
2530
2531 # we use static defaults from our JSON schema configuration
2532 foreach my $key (keys %$confdesc) {
2533 if (defined(my $default = $confdesc->{$key}->{default})) {
2534 $res->{$key} = $default;
2535 }
2536 }
2537
2538 return $res;
2539 }
2540
2541 sub config_list {
2542 my $vmlist = PVE::Cluster::get_vmlist();
2543 my $res = {};
2544 return $res if !$vmlist || !$vmlist->{ids};
2545 my $ids = $vmlist->{ids};
2546 my $nodename = nodename();
2547
2548 foreach my $vmid (keys %$ids) {
2549 my $d = $ids->{$vmid};
2550 next if !$d->{node} || $d->{node} ne $nodename;
2551 next if !$d->{type} || $d->{type} ne 'qemu';
2552 $res->{$vmid}->{exists} = 1;
2553 }
2554 return $res;
2555 }
2556
2557 # test if VM uses local resources (to prevent migration)
2558 sub check_local_resources {
2559 my ($conf, $noerr) = @_;
2560
2561 my @loc_res = ();
2562 my $mapped_res = [];
2563
2564 my $nodelist = PVE::Cluster::get_nodelist();
2565 my $pci_map = PVE::Mapping::PCI::config();
2566 my $usb_map = PVE::Mapping::USB::config();
2567
2568 my $missing_mappings_by_node = { map { $_ => [] } @$nodelist };
2569
2570 my $add_missing_mapping = sub {
2571 my ($type, $key, $id) = @_;
2572 for my $node (@$nodelist) {
2573 my $entry;
2574 if ($type eq 'pci') {
2575 $entry = PVE::Mapping::PCI::get_node_mapping($pci_map, $id, $node);
2576 } elsif ($type eq 'usb') {
2577 $entry = PVE::Mapping::USB::get_node_mapping($usb_map, $id, $node);
2578 }
2579 if (!scalar($entry->@*)) {
2580 push @{$missing_mappings_by_node->{$node}}, $key;
2581 }
2582 }
2583 };
2584
2585 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2586 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2587
2588 push @loc_res, "ivshmem" if $conf->{ivshmem};
2589
2590 foreach my $k (keys %$conf) {
2591 if ($k =~ m/^usb/) {
2592 my $entry = parse_property_string('pve-qm-usb', $conf->{$k});
2593 next if $entry->{host} && $entry->{host} =~ m/^spice$/i;
2594 if ($entry->{mapping}) {
2595 $add_missing_mapping->('usb', $k, $entry->{mapping});
2596 push @$mapped_res, $k;
2597 }
2598 }
2599 if ($k =~ m/^hostpci/) {
2600 my $entry = parse_property_string('pve-qm-hostpci', $conf->{$k});
2601 if ($entry->{mapping}) {
2602 $add_missing_mapping->('pci', $k, $entry->{mapping});
2603 push @$mapped_res, $k;
2604 }
2605 }
2606 # sockets are safe: they will recreated be on the target side post-migrate
2607 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2608 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2609 }
2610
2611 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2612
2613 return wantarray ? (\@loc_res, $mapped_res, $missing_mappings_by_node) : \@loc_res;
2614 }
2615
2616 # check if used storages are available on all nodes (use by migrate)
2617 sub check_storage_availability {
2618 my ($storecfg, $conf, $node) = @_;
2619
2620 PVE::QemuConfig->foreach_volume($conf, sub {
2621 my ($ds, $drive) = @_;
2622
2623 my $volid = $drive->{file};
2624 return if !$volid;
2625
2626 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2627 return if !$sid;
2628
2629 # check if storage is available on both nodes
2630 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2631 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2632
2633 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2634
2635 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2636 if !$scfg->{content}->{$vtype};
2637 });
2638 }
2639
2640 # list nodes where all VM images are available (used by has_feature API)
2641 sub shared_nodes {
2642 my ($conf, $storecfg) = @_;
2643
2644 my $nodelist = PVE::Cluster::get_nodelist();
2645 my $nodehash = { map { $_ => 1 } @$nodelist };
2646 my $nodename = nodename();
2647
2648 PVE::QemuConfig->foreach_volume($conf, sub {
2649 my ($ds, $drive) = @_;
2650
2651 my $volid = $drive->{file};
2652 return if !$volid;
2653
2654 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2655 if ($storeid) {
2656 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2657 if ($scfg->{disable}) {
2658 $nodehash = {};
2659 } elsif (my $avail = $scfg->{nodes}) {
2660 foreach my $node (keys %$nodehash) {
2661 delete $nodehash->{$node} if !$avail->{$node};
2662 }
2663 } elsif (!$scfg->{shared}) {
2664 foreach my $node (keys %$nodehash) {
2665 delete $nodehash->{$node} if $node ne $nodename
2666 }
2667 }
2668 }
2669 });
2670
2671 return $nodehash
2672 }
2673
2674 sub check_local_storage_availability {
2675 my ($conf, $storecfg) = @_;
2676
2677 my $nodelist = PVE::Cluster::get_nodelist();
2678 my $nodehash = { map { $_ => {} } @$nodelist };
2679
2680 PVE::QemuConfig->foreach_volume($conf, sub {
2681 my ($ds, $drive) = @_;
2682
2683 my $volid = $drive->{file};
2684 return if !$volid;
2685
2686 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2687 if ($storeid) {
2688 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2689
2690 if ($scfg->{disable}) {
2691 foreach my $node (keys %$nodehash) {
2692 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2693 }
2694 } elsif (my $avail = $scfg->{nodes}) {
2695 foreach my $node (keys %$nodehash) {
2696 if (!$avail->{$node}) {
2697 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2698 }
2699 }
2700 }
2701 }
2702 });
2703
2704 foreach my $node (values %$nodehash) {
2705 if (my $unavail = $node->{unavailable_storages}) {
2706 $node->{unavailable_storages} = [ sort keys %$unavail ];
2707 }
2708 }
2709
2710 return $nodehash
2711 }
2712
2713 # Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2714 sub check_running {
2715 my ($vmid, $nocheck, $node) = @_;
2716
2717 # $nocheck is set when called during a migration, in which case the config
2718 # file might still or already reside on the *other* node
2719 # - because rename has already happened, and current node is source
2720 # - because rename hasn't happened yet, and current node is target
2721 # - because rename has happened, current node is target, but hasn't yet
2722 # processed it yet
2723 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2724 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2725 }
2726
2727 sub vzlist {
2728
2729 my $vzlist = config_list();
2730
2731 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2732
2733 while (defined(my $de = $fd->read)) {
2734 next if $de !~ m/^(\d+)\.pid$/;
2735 my $vmid = $1;
2736 next if !defined($vzlist->{$vmid});
2737 if (my $pid = check_running($vmid)) {
2738 $vzlist->{$vmid}->{pid} = $pid;
2739 }
2740 }
2741
2742 return $vzlist;
2743 }
2744
2745 our $vmstatus_return_properties = {
2746 vmid => get_standard_option('pve-vmid'),
2747 status => {
2748 description => "QEMU process status.",
2749 type => 'string',
2750 enum => ['stopped', 'running'],
2751 },
2752 maxmem => {
2753 description => "Maximum memory in bytes.",
2754 type => 'integer',
2755 optional => 1,
2756 renderer => 'bytes',
2757 },
2758 maxdisk => {
2759 description => "Root disk size in bytes.",
2760 type => 'integer',
2761 optional => 1,
2762 renderer => 'bytes',
2763 },
2764 name => {
2765 description => "VM name.",
2766 type => 'string',
2767 optional => 1,
2768 },
2769 qmpstatus => {
2770 description => "VM run state from the 'query-status' QMP monitor command.",
2771 type => 'string',
2772 optional => 1,
2773 },
2774 pid => {
2775 description => "PID of running qemu process.",
2776 type => 'integer',
2777 optional => 1,
2778 },
2779 uptime => {
2780 description => "Uptime.",
2781 type => 'integer',
2782 optional => 1,
2783 renderer => 'duration',
2784 },
2785 cpus => {
2786 description => "Maximum usable CPUs.",
2787 type => 'number',
2788 optional => 1,
2789 },
2790 lock => {
2791 description => "The current config lock, if any.",
2792 type => 'string',
2793 optional => 1,
2794 },
2795 tags => {
2796 description => "The current configured tags, if any",
2797 type => 'string',
2798 optional => 1,
2799 },
2800 'running-machine' => {
2801 description => "The currently running machine type (if running).",
2802 type => 'string',
2803 optional => 1,
2804 },
2805 'running-qemu' => {
2806 description => "The currently running QEMU version (if running).",
2807 type => 'string',
2808 optional => 1,
2809 },
2810 };
2811
2812 my $last_proc_pid_stat;
2813
2814 # get VM status information
2815 # This must be fast and should not block ($full == false)
2816 # We only query KVM using QMP if $full == true (this can be slow)
2817 sub vmstatus {
2818 my ($opt_vmid, $full) = @_;
2819
2820 my $res = {};
2821
2822 my $storecfg = PVE::Storage::config();
2823
2824 my $list = vzlist();
2825 my $defaults = load_defaults();
2826
2827 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2828
2829 my $cpucount = $cpuinfo->{cpus} || 1;
2830
2831 foreach my $vmid (keys %$list) {
2832 next if $opt_vmid && ($vmid ne $opt_vmid);
2833
2834 my $conf = PVE::QemuConfig->load_config($vmid);
2835
2836 my $d = { vmid => int($vmid) };
2837 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2838
2839 # fixme: better status?
2840 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2841
2842 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2843 if (defined($size)) {
2844 $d->{disk} = 0; # no info available
2845 $d->{maxdisk} = $size;
2846 } else {
2847 $d->{disk} = 0;
2848 $d->{maxdisk} = 0;
2849 }
2850
2851 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2852 * ($conf->{cores} || $defaults->{cores});
2853 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2854 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2855
2856 $d->{name} = $conf->{name} || "VM $vmid";
2857 $d->{maxmem} = get_current_memory($conf->{memory})*(1024*1024);
2858
2859 if ($conf->{balloon}) {
2860 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2861 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2862 : $defaults->{shares};
2863 }
2864
2865 $d->{uptime} = 0;
2866 $d->{cpu} = 0;
2867 $d->{mem} = 0;
2868
2869 $d->{netout} = 0;
2870 $d->{netin} = 0;
2871
2872 $d->{diskread} = 0;
2873 $d->{diskwrite} = 0;
2874
2875 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2876
2877 $d->{serial} = 1 if conf_has_serial($conf);
2878 $d->{lock} = $conf->{lock} if $conf->{lock};
2879 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2880
2881 $res->{$vmid} = $d;
2882 }
2883
2884 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2885 foreach my $dev (keys %$netdev) {
2886 next if $dev !~ m/^tap([1-9]\d*)i/;
2887 my $vmid = $1;
2888 my $d = $res->{$vmid};
2889 next if !$d;
2890
2891 $d->{netout} += $netdev->{$dev}->{receive};
2892 $d->{netin} += $netdev->{$dev}->{transmit};
2893
2894 if ($full) {
2895 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2896 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2897 }
2898
2899 }
2900
2901 my $ctime = gettimeofday;
2902
2903 foreach my $vmid (keys %$list) {
2904
2905 my $d = $res->{$vmid};
2906 my $pid = $d->{pid};
2907 next if !$pid;
2908
2909 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2910 next if !$pstat; # not running
2911
2912 my $used = $pstat->{utime} + $pstat->{stime};
2913
2914 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2915
2916 if ($pstat->{vsize}) {
2917 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2918 }
2919
2920 my $old = $last_proc_pid_stat->{$pid};
2921 if (!$old) {
2922 $last_proc_pid_stat->{$pid} = {
2923 time => $ctime,
2924 used => $used,
2925 cpu => 0,
2926 };
2927 next;
2928 }
2929
2930 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
2931
2932 if ($dtime > 1000) {
2933 my $dutime = $used - $old->{used};
2934
2935 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
2936 $last_proc_pid_stat->{$pid} = {
2937 time => $ctime,
2938 used => $used,
2939 cpu => $d->{cpu},
2940 };
2941 } else {
2942 $d->{cpu} = $old->{cpu};
2943 }
2944 }
2945
2946 return $res if !$full;
2947
2948 my $qmpclient = PVE::QMPClient->new();
2949
2950 my $ballooncb = sub {
2951 my ($vmid, $resp) = @_;
2952
2953 my $info = $resp->{'return'};
2954 return if !$info->{max_mem};
2955
2956 my $d = $res->{$vmid};
2957
2958 # use memory assigned to VM
2959 $d->{maxmem} = $info->{max_mem};
2960 $d->{balloon} = $info->{actual};
2961
2962 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
2963 $d->{mem} = $info->{total_mem} - $info->{free_mem};
2964 $d->{freemem} = $info->{free_mem};
2965 }
2966
2967 $d->{ballooninfo} = $info;
2968 };
2969
2970 my $blockstatscb = sub {
2971 my ($vmid, $resp) = @_;
2972 my $data = $resp->{'return'} || [];
2973 my $totalrdbytes = 0;
2974 my $totalwrbytes = 0;
2975
2976 for my $blockstat (@$data) {
2977 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
2978 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
2979
2980 $blockstat->{device} =~ s/drive-//;
2981 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
2982 }
2983 $res->{$vmid}->{diskread} = $totalrdbytes;
2984 $res->{$vmid}->{diskwrite} = $totalwrbytes;
2985 };
2986
2987 my $machinecb = sub {
2988 my ($vmid, $resp) = @_;
2989 my $data = $resp->{'return'} || [];
2990
2991 $res->{$vmid}->{'running-machine'} =
2992 PVE::QemuServer::Machine::current_from_query_machines($data);
2993 };
2994
2995 my $versioncb = sub {
2996 my ($vmid, $resp) = @_;
2997 my $data = $resp->{'return'} // {};
2998 my $version = 'unknown';
2999
3000 if (my $v = $data->{qemu}) {
3001 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
3002 }
3003
3004 $res->{$vmid}->{'running-qemu'} = $version;
3005 };
3006
3007 my $statuscb = sub {
3008 my ($vmid, $resp) = @_;
3009
3010 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
3011 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
3012 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
3013 # this fails if ballon driver is not loaded, so this must be
3014 # the last commnand (following command are aborted if this fails).
3015 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
3016
3017 my $status = 'unknown';
3018 if (!defined($status = $resp->{'return'}->{status})) {
3019 warn "unable to get VM status\n";
3020 return;
3021 }
3022
3023 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
3024 };
3025
3026 foreach my $vmid (keys %$list) {
3027 next if $opt_vmid && ($vmid ne $opt_vmid);
3028 next if !$res->{$vmid}->{pid}; # not running
3029 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
3030 }
3031
3032 $qmpclient->queue_execute(undef, 2);
3033
3034 foreach my $vmid (keys %$list) {
3035 next if $opt_vmid && ($vmid ne $opt_vmid);
3036 next if !$res->{$vmid}->{pid}; #not running
3037
3038 # we can't use the $qmpclient since it might have already aborted on
3039 # 'query-balloon', but this might also fail for older versions...
3040 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
3041 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
3042 }
3043
3044 foreach my $vmid (keys %$list) {
3045 next if $opt_vmid && ($vmid ne $opt_vmid);
3046 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
3047 }
3048
3049 return $res;
3050 }
3051
3052 sub conf_has_serial {
3053 my ($conf) = @_;
3054
3055 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3056 if ($conf->{"serial$i"}) {
3057 return 1;
3058 }
3059 }
3060
3061 return 0;
3062 }
3063
3064 sub conf_has_audio {
3065 my ($conf, $id) = @_;
3066
3067 $id //= 0;
3068 my $audio = $conf->{"audio$id"};
3069 return if !defined($audio);
3070
3071 my $audioproperties = parse_property_string($audio_fmt, $audio);
3072 my $audiodriver = $audioproperties->{driver} // 'spice';
3073
3074 return {
3075 dev => $audioproperties->{device},
3076 dev_id => "audiodev$id",
3077 backend => $audiodriver,
3078 backend_id => "$audiodriver-backend${id}",
3079 };
3080 }
3081
3082 sub audio_devs {
3083 my ($audio, $audiopciaddr, $machine_version) = @_;
3084
3085 my $devs = [];
3086
3087 my $id = $audio->{dev_id};
3088 my $audiodev = "";
3089 if (min_version($machine_version, 4, 2)) {
3090 $audiodev = ",audiodev=$audio->{backend_id}";
3091 }
3092
3093 if ($audio->{dev} eq 'AC97') {
3094 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
3095 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3096 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
3097 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3098 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
3099 } else {
3100 die "unkown audio device '$audio->{dev}', implement me!";
3101 }
3102
3103 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3104
3105 return $devs;
3106 }
3107
3108 sub get_tpm_paths {
3109 my ($vmid) = @_;
3110 return {
3111 socket => "/var/run/qemu-server/$vmid.swtpm",
3112 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3113 };
3114 }
3115
3116 sub add_tpm_device {
3117 my ($vmid, $devices, $conf) = @_;
3118
3119 return if !$conf->{tpmstate0};
3120
3121 my $paths = get_tpm_paths($vmid);
3122
3123 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3124 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3125 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3126 }
3127
3128 sub start_swtpm {
3129 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3130
3131 return if !$tpmdrive;
3132
3133 my $state;
3134 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3135 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3136 if ($storeid) {
3137 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3138 } else {
3139 $state = $tpm->{file};
3140 }
3141
3142 my $paths = get_tpm_paths($vmid);
3143
3144 # during migration, we will get state from remote
3145 #
3146 if (!$migration) {
3147 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3148 my $setup_cmd = [
3149 "swtpm_setup",
3150 "--tpmstate",
3151 "file://$state",
3152 "--createek",
3153 "--create-ek-cert",
3154 "--create-platform-cert",
3155 "--lock-nvram",
3156 "--config",
3157 "/etc/swtpm_setup.conf", # do not use XDG configs
3158 "--runas",
3159 "0", # force creation as root, error if not possible
3160 "--not-overwrite", # ignore existing state, do not modify
3161 ];
3162
3163 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3164 # TPM 2.0 supports ECC crypto, use if possible
3165 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3166
3167 run_command($setup_cmd, outfunc => sub {
3168 print "swtpm_setup: $1\n";
3169 });
3170 }
3171
3172 # Used to distinguish different invocations in the log.
3173 my $log_prefix = "[id=" . int(time()) . "] ";
3174
3175 my $emulator_cmd = [
3176 "swtpm",
3177 "socket",
3178 "--tpmstate",
3179 "backend-uri=file://$state,mode=0600",
3180 "--ctrl",
3181 "type=unixio,path=$paths->{socket},mode=0600",
3182 "--pid",
3183 "file=$paths->{pid}",
3184 "--terminate", # terminate on QEMU disconnect
3185 "--daemon",
3186 "--log",
3187 "file=/run/qemu-server/$vmid-swtpm.log,level=1,prefix=$log_prefix",
3188 ];
3189 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3190 run_command($emulator_cmd, outfunc => sub { print $1; });
3191
3192 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3193 while (! -e $paths->{pid}) {
3194 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3195 usleep(50_000);
3196 }
3197
3198 # return untainted PID of swtpm daemon so it can be killed on error
3199 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3200 return $1;
3201 }
3202
3203 sub vga_conf_has_spice {
3204 my ($vga) = @_;
3205
3206 my $vgaconf = parse_vga($vga);
3207 my $vgatype = $vgaconf->{type};
3208 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3209
3210 return $1 || 1;
3211 }
3212
3213 sub get_vm_arch {
3214 my ($conf) = @_;
3215 return $conf->{arch} // get_host_arch();
3216 }
3217
3218 my $default_machines = {
3219 x86_64 => 'pc',
3220 aarch64 => 'virt',
3221 };
3222
3223 sub get_installed_machine_version {
3224 my ($kvmversion) = @_;
3225 $kvmversion = kvm_user_version() if !defined($kvmversion);
3226 $kvmversion =~ m/^(\d+\.\d+)/;
3227 return $1;
3228 }
3229
3230 sub windows_get_pinned_machine_version {
3231 my ($machine, $base_version, $kvmversion) = @_;
3232
3233 my $pin_version = $base_version;
3234 if (!defined($base_version) ||
3235 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3236 ) {
3237 $pin_version = get_installed_machine_version($kvmversion);
3238 }
3239 if (!$machine || $machine eq 'pc') {
3240 $machine = "pc-i440fx-$pin_version";
3241 } elsif ($machine eq 'q35') {
3242 $machine = "pc-q35-$pin_version";
3243 } elsif ($machine eq 'virt') {
3244 $machine = "virt-$pin_version";
3245 } else {
3246 warn "unknown machine type '$machine', not touching that!\n";
3247 }
3248
3249 return $machine;
3250 }
3251
3252 sub get_vm_machine {
3253 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3254
3255 my $machine_conf = PVE::QemuServer::Machine::parse_machine($conf->{machine});
3256 my $machine = $forcemachine || $machine_conf->{type};
3257
3258 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3259 $kvmversion //= kvm_user_version();
3260 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3261 # layout which confuses windows quite a bit and may result in various regressions..
3262 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3263 if (windows_version($conf->{ostype})) {
3264 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3265 }
3266 $arch //= 'x86_64';
3267 $machine ||= $default_machines->{$arch};
3268 if ($add_pve_version) {
3269 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3270 $machine .= "+pve$pvever";
3271 }
3272 }
3273
3274 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3275 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3276 $machine = $1 if $is_pxe;
3277
3278 # for version-pinned machines that do not include a pve-version (e.g.
3279 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3280 $machine .= '+pve0';
3281
3282 $machine .= '.pxe' if $is_pxe;
3283 }
3284
3285 return $machine;
3286 }
3287
3288 sub get_ovmf_files($$$) {
3289 my ($arch, $efidisk, $smm) = @_;
3290
3291 my $types = $OVMF->{$arch}
3292 or die "no OVMF images known for architecture '$arch'\n";
3293
3294 my $type = 'default';
3295 if ($arch eq 'x86_64') {
3296 if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3297 $type = $smm ? "4m" : "4m-no-smm";
3298 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3299 } else {
3300 # TODO: log_warn about use of legacy images for x86_64 with Promxox VE 9
3301 }
3302 }
3303
3304 my ($ovmf_code, $ovmf_vars) = $types->{$type}->@*;
3305 die "EFI base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3306 die "EFI vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
3307
3308 return ($ovmf_code, $ovmf_vars);
3309 }
3310
3311 my $Arch2Qemu = {
3312 aarch64 => '/usr/bin/qemu-system-aarch64',
3313 x86_64 => '/usr/bin/qemu-system-x86_64',
3314 };
3315 sub get_command_for_arch($) {
3316 my ($arch) = @_;
3317 return '/usr/bin/kvm' if is_native_arch($arch);
3318
3319 my $cmd = $Arch2Qemu->{$arch}
3320 or die "don't know how to emulate architecture '$arch'\n";
3321 return $cmd;
3322 }
3323
3324 # To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3325 # to use in a QEMU command line (-cpu element), first array_intersect the result
3326 # of query_supported_ with query_understood_. This is necessary because:
3327 #
3328 # a) query_understood_ returns flags the host cannot use and
3329 # b) query_supported_ (rather the QMP call) doesn't actually return CPU
3330 # flags, but CPU settings - with most of them being flags. Those settings
3331 # (and some flags, curiously) cannot be specified as a "-cpu" argument.
3332 #
3333 # query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3334 # expensive. If you need the value returned from this, you can get it much
3335 # cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3336 # $accel being 'kvm' or 'tcg'.
3337 #
3338 # pvestatd calls this function on startup and whenever the QEMU/KVM version
3339 # changes, automatically populating pmxcfs.
3340 #
3341 # Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3342 # since kvm and tcg machines support different flags
3343 #
3344 sub query_supported_cpu_flags {
3345 my ($arch) = @_;
3346
3347 $arch //= get_host_arch();
3348 my $default_machine = $default_machines->{$arch};
3349
3350 my $flags = {};
3351
3352 # FIXME: Once this is merged, the code below should work for ARM as well:
3353 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3354 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3355 $arch eq "aarch64";
3356
3357 my $kvm_supported = defined(kvm_version());
3358 my $qemu_cmd = get_command_for_arch($arch);
3359 my $fakevmid = -1;
3360 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3361
3362 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3363 my $query_supported_run_qemu = sub {
3364 my ($kvm) = @_;
3365
3366 my $flags = {};
3367 my $cmd = [
3368 $qemu_cmd,
3369 '-machine', $default_machine,
3370 '-display', 'none',
3371 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3372 '-mon', 'chardev=qmp,mode=control',
3373 '-pidfile', $pidfile,
3374 '-S', '-daemonize'
3375 ];
3376
3377 if (!$kvm) {
3378 push @$cmd, '-accel', 'tcg';
3379 }
3380
3381 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3382 die "QEMU flag querying VM exited with code " . $rc if $rc;
3383
3384 eval {
3385 my $cmd_result = mon_cmd(
3386 $fakevmid,
3387 'query-cpu-model-expansion',
3388 type => 'full',
3389 model => { name => 'host' }
3390 );
3391
3392 my $props = $cmd_result->{model}->{props};
3393 foreach my $prop (keys %$props) {
3394 next if $props->{$prop} ne '1';
3395 # QEMU returns some flags multiple times, with '_', '.' or '-'
3396 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3397 # We only keep those with underscores, to match /proc/cpuinfo
3398 $prop =~ s/\.|-/_/g;
3399 $flags->{$prop} = 1;
3400 }
3401 };
3402 my $err = $@;
3403
3404 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3405 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3406
3407 die $err if $err;
3408
3409 return [ sort keys %$flags ];
3410 };
3411
3412 # We need to query QEMU twice, since KVM and TCG have different supported flags
3413 PVE::QemuConfig->lock_config($fakevmid, sub {
3414 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3415 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3416
3417 if ($kvm_supported) {
3418 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3419 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3420 }
3421 });
3422
3423 return $flags;
3424 }
3425
3426 # Understood CPU flags are written to a file at 'pve-qemu' compile time
3427 my $understood_cpu_flag_dir = "/usr/share/kvm";
3428 sub query_understood_cpu_flags {
3429 my $arch = get_host_arch();
3430 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3431
3432 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3433 if ! -e $filepath;
3434
3435 my $raw = file_get_contents($filepath);
3436 $raw =~ s/^\s+|\s+$//g;
3437 my @flags = split(/\s+/, $raw);
3438
3439 return \@flags;
3440 }
3441
3442 # Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
3443 # anymore. But smm=off seems to be required when using SeaBIOS and serial display.
3444 my sub should_disable_smm {
3445 my ($conf, $vga, $machine) = @_;
3446
3447 return if $machine =~ m/^virt/; # there is no smm flag that could be disabled
3448
3449 return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
3450 $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
3451 }
3452
3453 my sub print_ovmf_drive_commandlines {
3454 my ($conf, $storecfg, $vmid, $arch, $q35, $version_guard) = @_;
3455
3456 my $d = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
3457
3458 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3459
3460 my $var_drive_str = "if=pflash,unit=1,id=drive-efidisk0";
3461 if ($d) {
3462 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3463 my ($path, $format) = $d->@{'file', 'format'};
3464 if ($storeid) {
3465 $path = PVE::Storage::path($storecfg, $d->{file});
3466 if (!defined($format)) {
3467 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3468 $format = qemu_img_format($scfg, $volname);
3469 }
3470 } elsif (!defined($format)) {
3471 die "efidisk format must be specified\n";
3472 }
3473 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3474 if ($path =~ m/^rbd:/) {
3475 $var_drive_str .= ',cache=writeback';
3476 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3477 }
3478 $var_drive_str .= ",format=$format,file=$path";
3479
3480 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $format eq 'raw' && $version_guard->(4, 1, 2);
3481 $var_drive_str .= ',readonly=on' if drive_is_read_only($conf, $d);
3482 } else {
3483 log_warn("no efidisk configured! Using temporary efivars disk.");
3484 my $path = "/tmp/$vmid-ovmf.fd";
3485 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3486 $var_drive_str .= ",format=raw,file=$path";
3487 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $version_guard->(4, 1, 2);
3488 }
3489
3490 return ("if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code", $var_drive_str);
3491 }
3492
3493 sub config_to_command {
3494 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3495 $live_restore_backing) = @_;
3496
3497 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3498 my $devices = [];
3499 my $bridges = {};
3500 my $ostype = $conf->{ostype};
3501 my $winversion = windows_version($ostype);
3502 my $kvm = $conf->{kvm};
3503 my $nodename = nodename();
3504
3505 my $machine_conf = PVE::QemuServer::Machine::parse_machine($conf->{machine});
3506
3507 my $arch = get_vm_arch($conf);
3508 my $kvm_binary = get_command_for_arch($arch);
3509 my $kvmver = kvm_user_version($kvm_binary);
3510
3511 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3512 $kvmver //= "undefined";
3513 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3514 }
3515
3516 my $add_pve_version = min_version($kvmver, 4, 1);
3517
3518 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3519 my $machine_version = extract_version($machine_type, $kvmver);
3520 $kvm //= 1 if is_native_arch($arch);
3521
3522 $machine_version =~ m/(\d+)\.(\d+)/;
3523 my ($machine_major, $machine_minor) = ($1, $2);
3524
3525 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3526 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3527 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3528 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3529 ." please upgrade node '$nodename'\n"
3530 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3531 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3532 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3533 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3534 ." node '$nodename'\n";
3535 }
3536
3537 # if a specific +pve version is required for a feature, use $version_guard
3538 # instead of min_version to allow machines to be run with the minimum
3539 # required version
3540 my $required_pve_version = 0;
3541 my $version_guard = sub {
3542 my ($major, $minor, $pve) = @_;
3543 return 0 if !min_version($machine_version, $major, $minor, $pve);
3544 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3545 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3546 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3547 return 1;
3548 };
3549
3550 if ($kvm && !defined kvm_version()) {
3551 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3552 ." or enable in BIOS.\n";
3553 }
3554
3555 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3556 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3557 my $use_old_bios_files = undef;
3558 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3559
3560 my $cmd = [];
3561 if ($conf->{affinity}) {
3562 push @$cmd, '/usr/bin/taskset', '--cpu-list', '--all-tasks', $conf->{affinity};
3563 }
3564
3565 push @$cmd, $kvm_binary;
3566
3567 push @$cmd, '-id', $vmid;
3568
3569 my $vmname = $conf->{name} || "vm$vmid";
3570
3571 push @$cmd, '-name', "$vmname,debug-threads=on";
3572
3573 push @$cmd, '-no-shutdown';
3574
3575 my $use_virtio = 0;
3576
3577 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3578 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3579 push @$cmd, '-mon', "chardev=qmp,mode=control";
3580
3581 if (min_version($machine_version, 2, 12)) {
3582 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3583 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3584 }
3585
3586 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3587
3588 push @$cmd, '-daemonize';
3589
3590 if ($conf->{smbios1}) {
3591 my $smbios_conf = parse_smbios1($conf->{smbios1});
3592 if ($smbios_conf->{base64}) {
3593 # Do not pass base64 flag to qemu
3594 delete $smbios_conf->{base64};
3595 my $smbios_string = "";
3596 foreach my $key (keys %$smbios_conf) {
3597 my $value;
3598 if ($key eq "uuid") {
3599 $value = $smbios_conf->{uuid}
3600 } else {
3601 $value = decode_base64($smbios_conf->{$key});
3602 }
3603 # qemu accepts any binary data, only commas need escaping by double comma
3604 $value =~ s/,/,,/g;
3605 $smbios_string .= "," . $key . "=" . $value if $value;
3606 }
3607 push @$cmd, '-smbios', "type=1" . $smbios_string;
3608 } else {
3609 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3610 }
3611 }
3612
3613 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3614 die "OVMF (UEFI) BIOS is not supported on 32-bit CPU types\n"
3615 if !$forcecpu && get_cpu_bitness($conf->{cpu}, $arch) == 32;
3616
3617 my ($code_drive_str, $var_drive_str) =
3618 print_ovmf_drive_commandlines($conf, $storecfg, $vmid, $arch, $q35, $version_guard);
3619 push $cmd->@*, '-drive', $code_drive_str;
3620 push $cmd->@*, '-drive', $var_drive_str;
3621 }
3622
3623 if ($q35) { # tell QEMU to load q35 config early
3624 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3625 if (min_version($machine_version, 4, 0)) {
3626 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3627 } else {
3628 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3629 }
3630 }
3631
3632 if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
3633 push @$cmd, $fixups->@*;
3634 }
3635
3636 if ($conf->{vmgenid}) {
3637 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3638 }
3639
3640 # add usb controllers
3641 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3642 $conf, $bridges, $arch, $machine_type, $machine_version);
3643 push @$devices, @usbcontrollers if @usbcontrollers;
3644 my $vga = parse_vga($conf->{vga});
3645
3646 my $qxlnum = vga_conf_has_spice($conf->{vga});
3647 $vga->{type} = 'qxl' if $qxlnum;
3648
3649 if (!$vga->{type}) {
3650 if ($arch eq 'aarch64') {
3651 $vga->{type} = 'virtio';
3652 } elsif (min_version($machine_version, 2, 9)) {
3653 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3654 } else {
3655 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3656 }
3657 }
3658
3659 # enable absolute mouse coordinates (needed by vnc)
3660 my $tablet = $conf->{tablet};
3661 if (!defined($tablet)) {
3662 $tablet = $defaults->{tablet};
3663 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3664 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3665 }
3666
3667 if ($tablet) {
3668 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3669 my $kbd = print_keyboarddevice_full($conf, $arch);
3670 push @$devices, '-device', $kbd if defined($kbd);
3671 }
3672
3673 my $bootorder = device_bootorder($conf);
3674
3675 # host pci device passthrough
3676 my ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices) = PVE::QemuServer::PCI::print_hostpci_devices(
3677 $vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder);
3678
3679 # usb devices
3680 my $usb_dev_features = {};
3681 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3682
3683 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3684 $conf, $usb_dev_features, $bootorder, $machine_version);
3685 push @$devices, @usbdevices if @usbdevices;
3686
3687 # serial devices
3688 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3689 my $path = $conf->{"serial$i"} or next;
3690 if ($path eq 'socket') {
3691 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3692 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3693 # On aarch64, serial0 is the UART device. QEMU only allows
3694 # connecting UART devices via the '-serial' command line, as
3695 # the device has a fixed slot on the hardware...
3696 if ($arch eq 'aarch64' && $i == 0) {
3697 push @$devices, '-serial', "chardev:serial$i";
3698 } else {
3699 push @$devices, '-device', "isa-serial,chardev=serial$i";
3700 }
3701 } else {
3702 die "no such serial device\n" if ! -c $path;
3703 push @$devices, '-chardev', "serial,id=serial$i,path=$path";
3704 push @$devices, '-device', "isa-serial,chardev=serial$i";
3705 }
3706 }
3707
3708 # parallel devices
3709 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3710 if (my $path = $conf->{"parallel$i"}) {
3711 die "no such parallel device\n" if ! -c $path;
3712 my $devtype = $path =~ m!^/dev/usb/lp! ? 'serial' : 'parallel';
3713 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3714 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3715 }
3716 }
3717
3718 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3719 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3720 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3721 push @$devices, @$audio_devs;
3722 }
3723
3724 # Add a TPM only if the VM is not a template,
3725 # to support backing up template VMs even if the TPM disk is write-protected.
3726 add_tpm_device($vmid, $devices, $conf) if (!PVE::QemuConfig->is_template($conf));
3727
3728 my $sockets = 1;
3729 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3730 $sockets = $conf->{sockets} if $conf->{sockets};
3731
3732 my $cores = $conf->{cores} || 1;
3733
3734 my $maxcpus = $sockets * $cores;
3735
3736 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3737
3738 my $allowed_vcpus = $cpuinfo->{cpus};
3739
3740 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3741
3742 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3743 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3744 for (my $i = 2; $i <= $vcpus; $i++) {
3745 my $cpustr = print_cpu_device($conf, $arch, $i);
3746 push @$cmd, '-device', $cpustr;
3747 }
3748
3749 } else {
3750
3751 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3752 }
3753 push @$cmd, '-nodefaults';
3754
3755 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3756
3757 push $machineFlags->@*, 'acpi=off' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3758
3759 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3760
3761 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3762 push @$devices, '-device', print_vga_device(
3763 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3764
3765 push @$cmd, '-display', 'egl-headless,gl=core' if $vga->{type} eq 'virtio-gl'; # VIRGL
3766
3767 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3768 push @$cmd, '-vnc', "unix:$socket,password=on";
3769 } else {
3770 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3771 push @$cmd, '-nographic';
3772 }
3773
3774 # time drift fix
3775 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3776 my $useLocaltime = $conf->{localtime};
3777
3778 if ($winversion >= 5) { # windows
3779 $useLocaltime = 1 if !defined($conf->{localtime});
3780
3781 # use time drift fix when acpi is enabled
3782 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3783 $tdf = 1 if !defined($conf->{tdf});
3784 }
3785 }
3786
3787 if ($winversion >= 6) {
3788 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3789 push @$machineFlags, 'hpet=off';
3790 }
3791
3792 push @$rtcFlags, 'driftfix=slew' if $tdf;
3793
3794 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3795 push @$rtcFlags, "base=$conf->{startdate}";
3796 } elsif ($useLocaltime) {
3797 push @$rtcFlags, 'base=localtime';
3798 }
3799
3800 if ($forcecpu) {
3801 push @$cmd, '-cpu', $forcecpu;
3802 } else {
3803 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3804 }
3805
3806 PVE::QemuServer::Memory::config(
3807 $conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd);
3808
3809 push @$cmd, '-S' if $conf->{freeze};
3810
3811 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3812
3813 my $guest_agent = parse_guest_agent($conf);
3814
3815 if ($guest_agent->{enabled}) {
3816 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3817 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3818
3819 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3820 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3821 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3822 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3823 } elsif ($guest_agent->{type} eq 'isa') {
3824 push @$devices, '-device', "isa-serial,chardev=qga0";
3825 }
3826 }
3827
3828 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3829 if ($rng && $version_guard->(4, 1, 2)) {
3830 check_rng_source($rng->{source});
3831
3832 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3833 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3834 my $limiter_str = "";
3835 if ($max_bytes) {
3836 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3837 }
3838
3839 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3840 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3841 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3842 }
3843
3844 my $spice_port;
3845
3846 assert_clipboard_config($vga);
3847 my $is_spice = $qxlnum || $vga->{type} =~ /^virtio/;
3848
3849 if ($is_spice || ($vga->{'clipboard'} && $vga->{'clipboard'} eq 'vnc')) {
3850 if ($qxlnum > 1) {
3851 if ($winversion){
3852 for (my $i = 1; $i < $qxlnum; $i++){
3853 push @$devices, '-device', print_vga_device(
3854 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3855 }
3856 } else {
3857 # assume other OS works like Linux
3858 my ($ram, $vram) = ("134217728", "67108864");
3859 if ($vga->{memory}) {
3860 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3861 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3862 }
3863 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3864 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3865 }
3866 }
3867
3868 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3869
3870 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3871 if ($vga->{'clipboard'} && $vga->{'clipboard'} eq 'vnc') {
3872 push @$devices, '-chardev', 'qemu-vdagent,id=vdagent,name=vdagent,clipboard=on';
3873 } else {
3874 push @$devices, '-chardev', 'spicevmc,id=vdagent,name=vdagent';
3875 }
3876 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3877
3878 if ($is_spice) {
3879 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3880 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3881 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3882
3883 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3884 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3885
3886 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3887 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3888 if ($spice_enhancement->{foldersharing}) {
3889 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3890 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3891 }
3892
3893 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3894 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3895 if $spice_enhancement->{videostreaming};
3896 push @$devices, '-spice', "$spice_opts";
3897 }
3898 }
3899
3900 # enable balloon by default, unless explicitly disabled
3901 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3902 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3903 my $ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr";
3904 $ballooncmd .= ",free-page-reporting=on" if min_version($machine_version, 6, 2);
3905 push @$devices, '-device', $ballooncmd;
3906 }
3907
3908 if ($conf->{watchdog}) {
3909 my $wdopts = parse_watchdog($conf->{watchdog});
3910 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3911 my $watchdog = $wdopts->{model} || 'i6300esb';
3912 push @$devices, '-device', "$watchdog$pciaddr";
3913 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3914 }
3915
3916 my $vollist = [];
3917 my $scsicontroller = {};
3918 my $ahcicontroller = {};
3919 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3920
3921 # Add iscsi initiator name if available
3922 if (my $initiator = get_initiator_name()) {
3923 push @$devices, '-iscsi', "initiator-name=$initiator";
3924 }
3925
3926 PVE::QemuConfig->foreach_volume($conf, sub {
3927 my ($ds, $drive) = @_;
3928
3929 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3930 check_volume_storage_type($storecfg, $drive->{file});
3931 push @$vollist, $drive->{file};
3932 }
3933
3934 # ignore efidisk here, already added in bios/fw handling code above
3935 return if $drive->{interface} eq 'efidisk';
3936 # similar for TPM
3937 return if $drive->{interface} eq 'tpmstate';
3938
3939 $use_virtio = 1 if $ds =~ m/^virtio/;
3940
3941 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3942
3943 if ($drive->{interface} eq 'virtio'){
3944 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
3945 }
3946
3947 if ($drive->{interface} eq 'scsi') {
3948
3949 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
3950
3951 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
3952 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
3953
3954 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
3955 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
3956
3957 my $iothread = '';
3958 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
3959 $iothread .= ",iothread=iothread-$controller_prefix$controller";
3960 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
3961 } elsif ($drive->{iothread}) {
3962 log_warn(
3963 "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n"
3964 );
3965 }
3966
3967 my $queues = '';
3968 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
3969 $queues = ",num_queues=$drive->{queues}";
3970 }
3971
3972 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
3973 if !$scsicontroller->{$controller};
3974 $scsicontroller->{$controller}=1;
3975 }
3976
3977 if ($drive->{interface} eq 'sata') {
3978 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
3979 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
3980 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
3981 if !$ahcicontroller->{$controller};
3982 $ahcicontroller->{$controller}=1;
3983 }
3984
3985 my $live_restore = $live_restore_backing->{$ds};
3986 my $live_blockdev_name = undef;
3987 if ($live_restore) {
3988 $live_blockdev_name = $live_restore->{name};
3989 push @$devices, '-blockdev', $live_restore->{blockdev};
3990 }
3991
3992 my $drive_cmd = print_drive_commandline_full(
3993 $storecfg, $vmid, $drive, $live_blockdev_name, min_version($kvmver, 6, 0));
3994
3995 # extra protection for templates, but SATA and IDE don't support it..
3996 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
3997
3998 push @$devices, '-drive',$drive_cmd;
3999 push @$devices, '-device', print_drivedevice_full(
4000 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
4001 });
4002
4003 for (my $i = 0; $i < $MAX_NETS; $i++) {
4004 my $netname = "net$i";
4005
4006 next if !$conf->{$netname};
4007 my $d = parse_net($conf->{$netname});
4008 next if !$d;
4009 # save the MAC addr here (could be auto-gen. in some odd setups) for FDB registering later?
4010
4011 $use_virtio = 1 if $d->{model} eq 'virtio';
4012
4013 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
4014
4015 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
4016 push @$devices, '-netdev', $netdevfull;
4017
4018 my $netdevicefull = print_netdevice_full(
4019 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version);
4020
4021 push @$devices, '-device', $netdevicefull;
4022 }
4023
4024 if ($conf->{ivshmem}) {
4025 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
4026
4027 my $bus;
4028 if ($q35) {
4029 $bus = print_pcie_addr("ivshmem");
4030 } else {
4031 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
4032 }
4033
4034 my $ivshmem_name = $ivshmem->{name} // $vmid;
4035 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
4036
4037 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
4038 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
4039 .",size=$ivshmem->{size}M";
4040 }
4041
4042 # pci.4 is nested in pci.1
4043 $bridges->{1} = 1 if $bridges->{4};
4044
4045 if (!$q35) { # add pci bridges
4046 if (min_version($machine_version, 2, 3)) {
4047 $bridges->{1} = 1;
4048 $bridges->{2} = 1;
4049 }
4050 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
4051 }
4052
4053 for my $k (sort {$b cmp $a} keys %$bridges) {
4054 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
4055
4056 my $k_name = $k;
4057 if ($k == 2 && $legacy_igd) {
4058 $k_name = "$k-igd";
4059 }
4060 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
4061 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
4062
4063 if ($q35) { # add after -readconfig pve-q35.cfg
4064 splice @$devices, 2, 0, '-device', $devstr;
4065 } else {
4066 unshift @$devices, '-device', $devstr if $k > 0;
4067 }
4068 }
4069
4070 if (!$kvm) {
4071 push @$machineFlags, 'accel=tcg';
4072 }
4073
4074 push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga, $machine_type);
4075
4076 my $machine_type_min = $machine_type;
4077 if ($add_pve_version) {
4078 $machine_type_min =~ s/\+pve\d+$//;
4079 $machine_type_min .= "+pve$required_pve_version";
4080 }
4081 push @$machineFlags, "type=${machine_type_min}";
4082
4083 push @$cmd, @$devices;
4084 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
4085 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
4086 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
4087
4088 if (my $vmstate = $conf->{vmstate}) {
4089 my $statepath = PVE::Storage::path($storecfg, $vmstate);
4090 push @$vollist, $vmstate;
4091 push @$cmd, '-loadstate', $statepath;
4092 print "activating and using '$vmstate' as vmstate\n";
4093 }
4094
4095 if (PVE::QemuConfig->is_template($conf)) {
4096 # needed to workaround base volumes being read-only
4097 push @$cmd, '-snapshot';
4098 }
4099
4100 # add custom args
4101 if ($conf->{args}) {
4102 my $aa = PVE::Tools::split_args($conf->{args});
4103 push @$cmd, @$aa;
4104 }
4105
4106 return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cmd;
4107 }
4108
4109 sub check_rng_source {
4110 my ($source) = @_;
4111
4112 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4113 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4114 if ! -e $source;
4115
4116 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4117 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4118 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4119 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4120 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4121 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4122 ." to the host.\n";
4123 }
4124 }
4125
4126 sub spice_port {
4127 my ($vmid) = @_;
4128
4129 my $res = mon_cmd($vmid, 'query-spice');
4130
4131 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
4132 }
4133
4134 sub vm_devices_list {
4135 my ($vmid) = @_;
4136
4137 my $res = mon_cmd($vmid, 'query-pci');
4138 my $devices_to_check = [];
4139 my $devices = {};
4140 foreach my $pcibus (@$res) {
4141 push @$devices_to_check, @{$pcibus->{devices}},
4142 }
4143
4144 while (@$devices_to_check) {
4145 my $to_check = [];
4146 for my $d (@$devices_to_check) {
4147 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
4148 next if !$d->{'pci_bridge'} || !$d->{'pci_bridge'}->{devices};
4149
4150 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4151 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
4152 }
4153 $devices_to_check = $to_check;
4154 }
4155
4156 my $resblock = mon_cmd($vmid, 'query-block');
4157 foreach my $block (@$resblock) {
4158 if($block->{device} =~ m/^drive-(\S+)/){
4159 $devices->{$1} = 1;
4160 }
4161 }
4162
4163 my $resmice = mon_cmd($vmid, 'query-mice');
4164 foreach my $mice (@$resmice) {
4165 if ($mice->{name} eq 'QEMU HID Tablet') {
4166 $devices->{tablet} = 1;
4167 last;
4168 }
4169 }
4170
4171 # for usb devices there is no query-usb
4172 # but we can iterate over the entries in
4173 # qom-list path=/machine/peripheral
4174 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4175 foreach my $per (@$resperipheral) {
4176 if ($per->{name} =~ m/^usb(?:redirdev)?\d+$/) {
4177 $devices->{$per->{name}} = 1;
4178 }
4179 }
4180
4181 return $devices;
4182 }
4183
4184 sub vm_deviceplug {
4185 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4186
4187 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4188
4189 my $devices_list = vm_devices_list($vmid);
4190 return 1 if defined($devices_list->{$deviceid});
4191
4192 # add PCI bridge if we need it for the device
4193 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4194
4195 if ($deviceid eq 'tablet') {
4196 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4197 } elsif ($deviceid eq 'keyboard') {
4198 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4199 } elsif ($deviceid =~ m/^usbredirdev(\d+)$/) {
4200 my $id = $1;
4201 qemu_spice_usbredir_chardev_add($vmid, "usbredirchardev$id");
4202 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_spice_usbdevice($id, "xhci", $id + 1));
4203 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4204 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device, {}, $1 + 1));
4205 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4206 qemu_iothread_add($vmid, $deviceid, $device);
4207
4208 qemu_driveadd($storecfg, $vmid, $device);
4209 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4210
4211 qemu_deviceadd($vmid, $devicefull);
4212 eval { qemu_deviceaddverify($vmid, $deviceid); };
4213 if (my $err = $@) {
4214 eval { qemu_drivedel($vmid, $deviceid); };
4215 warn $@ if $@;
4216 die $err;
4217 }
4218 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4219 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4220 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4221 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4222
4223 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4224
4225 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4226 qemu_iothread_add($vmid, $deviceid, $device);
4227 $devicefull .= ",iothread=iothread-$deviceid";
4228 }
4229
4230 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4231 $devicefull .= ",num_queues=$device->{queues}";
4232 }
4233
4234 qemu_deviceadd($vmid, $devicefull);
4235 qemu_deviceaddverify($vmid, $deviceid);
4236 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4237 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4238 qemu_driveadd($storecfg, $vmid, $device);
4239
4240 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4241 eval { qemu_deviceadd($vmid, $devicefull); };
4242 if (my $err = $@) {
4243 eval { qemu_drivedel($vmid, $deviceid); };
4244 warn $@ if $@;
4245 die $err;
4246 }
4247 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4248 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4249
4250 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4251 my $machine_version = PVE::QemuServer::Machine::extract_version($machine_type);
4252 my $use_old_bios_files = undef;
4253 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4254
4255 my $netdevicefull = print_netdevice_full(
4256 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type, $machine_version);
4257 qemu_deviceadd($vmid, $netdevicefull);
4258 eval {
4259 qemu_deviceaddverify($vmid, $deviceid);
4260 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4261 };
4262 if (my $err = $@) {
4263 eval { qemu_netdevdel($vmid, $deviceid); };
4264 warn $@ if $@;
4265 die $err;
4266 }
4267 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4268 my $bridgeid = $2;
4269 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4270 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4271
4272 qemu_deviceadd($vmid, $devicefull);
4273 qemu_deviceaddverify($vmid, $deviceid);
4274 } else {
4275 die "can't hotplug device '$deviceid'\n";
4276 }
4277
4278 return 1;
4279 }
4280
4281 # fixme: this should raise exceptions on error!
4282 sub vm_deviceunplug {
4283 my ($vmid, $conf, $deviceid) = @_;
4284
4285 my $devices_list = vm_devices_list($vmid);
4286 return 1 if !defined($devices_list->{$deviceid});
4287
4288 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4289 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4290
4291 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard' || $deviceid eq 'xhci') {
4292 qemu_devicedel($vmid, $deviceid);
4293 } elsif ($deviceid =~ m/^usbredirdev\d+$/) {
4294 qemu_devicedel($vmid, $deviceid);
4295 qemu_devicedelverify($vmid, $deviceid);
4296 } elsif ($deviceid =~ m/^usb\d+$/) {
4297 qemu_devicedel($vmid, $deviceid);
4298 qemu_devicedelverify($vmid, $deviceid);
4299 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4300 my $device = parse_drive($deviceid, $conf->{$deviceid});
4301
4302 qemu_devicedel($vmid, $deviceid);
4303 qemu_devicedelverify($vmid, $deviceid);
4304 qemu_drivedel($vmid, $deviceid);
4305 qemu_iothread_del($vmid, $deviceid, $device);
4306 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4307 qemu_devicedel($vmid, $deviceid);
4308 qemu_devicedelverify($vmid, $deviceid);
4309 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4310 my $device = parse_drive($deviceid, $conf->{$deviceid});
4311
4312 qemu_devicedel($vmid, $deviceid);
4313 qemu_devicedelverify($vmid, $deviceid);
4314 qemu_drivedel($vmid, $deviceid);
4315 qemu_deletescsihw($conf, $vmid, $deviceid);
4316
4317 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4318 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4319 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4320 qemu_devicedel($vmid, $deviceid);
4321 qemu_devicedelverify($vmid, $deviceid);
4322 qemu_netdevdel($vmid, $deviceid);
4323 } else {
4324 die "can't unplug device '$deviceid'\n";
4325 }
4326
4327 return 1;
4328 }
4329
4330 sub qemu_spice_usbredir_chardev_add {
4331 my ($vmid, $id) = @_;
4332
4333 mon_cmd($vmid, "chardev-add" , (
4334 id => $id,
4335 backend => {
4336 type => 'spicevmc',
4337 data => {
4338 type => "usbredir",
4339 },
4340 },
4341 ));
4342 }
4343
4344 sub qemu_iothread_add {
4345 my ($vmid, $deviceid, $device) = @_;
4346
4347 if ($device->{iothread}) {
4348 my $iothreads = vm_iothreads_list($vmid);
4349 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4350 }
4351 }
4352
4353 sub qemu_iothread_del {
4354 my ($vmid, $deviceid, $device) = @_;
4355
4356 if ($device->{iothread}) {
4357 my $iothreads = vm_iothreads_list($vmid);
4358 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4359 }
4360 }
4361
4362 sub qemu_driveadd {
4363 my ($storecfg, $vmid, $device) = @_;
4364
4365 my $kvmver = get_running_qemu_version($vmid);
4366 my $io_uring = min_version($kvmver, 6, 0);
4367 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4368 $drive =~ s/\\/\\\\/g;
4369 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4370
4371 # If the command succeeds qemu prints: "OK"
4372 return 1 if $ret =~ m/OK/s;
4373
4374 die "adding drive failed: $ret\n";
4375 }
4376
4377 sub qemu_drivedel {
4378 my ($vmid, $deviceid) = @_;
4379
4380 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4381 $ret =~ s/^\s+//;
4382
4383 return 1 if $ret eq "";
4384
4385 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4386 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4387
4388 die "deleting drive $deviceid failed : $ret\n";
4389 }
4390
4391 sub qemu_deviceaddverify {
4392 my ($vmid, $deviceid) = @_;
4393
4394 for (my $i = 0; $i <= 5; $i++) {
4395 my $devices_list = vm_devices_list($vmid);
4396 return 1 if defined($devices_list->{$deviceid});
4397 sleep 1;
4398 }
4399
4400 die "error on hotplug device '$deviceid'\n";
4401 }
4402
4403
4404 sub qemu_devicedelverify {
4405 my ($vmid, $deviceid) = @_;
4406
4407 # need to verify that the device is correctly removed as device_del
4408 # is async and empty return is not reliable
4409
4410 for (my $i = 0; $i <= 5; $i++) {
4411 my $devices_list = vm_devices_list($vmid);
4412 return 1 if !defined($devices_list->{$deviceid});
4413 sleep 1;
4414 }
4415
4416 die "error on hot-unplugging device '$deviceid'\n";
4417 }
4418
4419 sub qemu_findorcreatescsihw {
4420 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4421
4422 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4423
4424 my $scsihwid="$controller_prefix$controller";
4425 my $devices_list = vm_devices_list($vmid);
4426
4427 if (!defined($devices_list->{$scsihwid})) {
4428 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4429 }
4430
4431 return 1;
4432 }
4433
4434 sub qemu_deletescsihw {
4435 my ($conf, $vmid, $opt) = @_;
4436
4437 my $device = parse_drive($opt, $conf->{$opt});
4438
4439 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4440 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4441 return 1;
4442 }
4443
4444 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4445
4446 my $devices_list = vm_devices_list($vmid);
4447 foreach my $opt (keys %{$devices_list}) {
4448 if (is_valid_drivename($opt)) {
4449 my $drive = parse_drive($opt, $conf->{$opt});
4450 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4451 return 1;
4452 }
4453 }
4454 }
4455
4456 my $scsihwid="scsihw$controller";
4457
4458 vm_deviceunplug($vmid, $conf, $scsihwid);
4459
4460 return 1;
4461 }
4462
4463 sub qemu_add_pci_bridge {
4464 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4465
4466 my $bridges = {};
4467
4468 my $bridgeid;
4469
4470 print_pci_addr($device, $bridges, $arch, $machine_type);
4471
4472 while (my ($k, $v) = each %$bridges) {
4473 $bridgeid = $k;
4474 }
4475 return 1 if !defined($bridgeid) || $bridgeid < 1;
4476
4477 my $bridge = "pci.$bridgeid";
4478 my $devices_list = vm_devices_list($vmid);
4479
4480 if (!defined($devices_list->{$bridge})) {
4481 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4482 }
4483
4484 return 1;
4485 }
4486
4487 sub qemu_set_link_status {
4488 my ($vmid, $device, $up) = @_;
4489
4490 mon_cmd($vmid, "set_link", name => $device,
4491 up => $up ? JSON::true : JSON::false);
4492 }
4493
4494 sub qemu_netdevadd {
4495 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4496
4497 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4498 my %options = split(/[=,]/, $netdev);
4499
4500 if (defined(my $vhost = $options{vhost})) {
4501 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4502 }
4503
4504 if (defined(my $queues = $options{queues})) {
4505 $options{queues} = $queues + 0;
4506 }
4507
4508 mon_cmd($vmid, "netdev_add", %options);
4509 return 1;
4510 }
4511
4512 sub qemu_netdevdel {
4513 my ($vmid, $deviceid) = @_;
4514
4515 mon_cmd($vmid, "netdev_del", id => $deviceid);
4516 }
4517
4518 sub qemu_usb_hotplug {
4519 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4520
4521 return if !$device;
4522
4523 # remove the old one first
4524 vm_deviceunplug($vmid, $conf, $deviceid);
4525
4526 # check if xhci controller is necessary and available
4527 my $devicelist = vm_devices_list($vmid);
4528
4529 if (!$devicelist->{xhci}) {
4530 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4531 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr));
4532 }
4533
4534 # add the new one
4535 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type);
4536 }
4537
4538 sub qemu_cpu_hotplug {
4539 my ($vmid, $conf, $vcpus) = @_;
4540
4541 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4542
4543 my $sockets = 1;
4544 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4545 $sockets = $conf->{sockets} if $conf->{sockets};
4546 my $cores = $conf->{cores} || 1;
4547 my $maxcpus = $sockets * $cores;
4548
4549 $vcpus = $maxcpus if !$vcpus;
4550
4551 die "you can't add more vcpus than maxcpus\n"
4552 if $vcpus > $maxcpus;
4553
4554 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4555
4556 if ($vcpus < $currentvcpus) {
4557
4558 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4559
4560 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4561 qemu_devicedel($vmid, "cpu$i");
4562 my $retry = 0;
4563 my $currentrunningvcpus = undef;
4564 while (1) {
4565 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4566 last if scalar(@{$currentrunningvcpus}) == $i-1;
4567 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4568 $retry++;
4569 sleep 1;
4570 }
4571 #update conf after each succesfull cpu unplug
4572 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4573 PVE::QemuConfig->write_config($vmid, $conf);
4574 }
4575 } else {
4576 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4577 }
4578
4579 return;
4580 }
4581
4582 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4583 die "vcpus in running vm does not match its configuration\n"
4584 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4585
4586 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4587 my $arch = get_vm_arch($conf);
4588
4589 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4590 my $cpustr = print_cpu_device($conf, $arch, $i);
4591 qemu_deviceadd($vmid, $cpustr);
4592
4593 my $retry = 0;
4594 my $currentrunningvcpus = undef;
4595 while (1) {
4596 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4597 last if scalar(@{$currentrunningvcpus}) == $i;
4598 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4599 sleep 1;
4600 $retry++;
4601 }
4602 #update conf after each succesfull cpu hotplug
4603 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4604 PVE::QemuConfig->write_config($vmid, $conf);
4605 }
4606 } else {
4607
4608 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4609 mon_cmd($vmid, "cpu-add", id => int($i));
4610 }
4611 }
4612 }
4613
4614 sub qemu_block_set_io_throttle {
4615 my ($vmid, $deviceid,
4616 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4617 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4618 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4619 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4620
4621 return if !check_running($vmid) ;
4622
4623 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4624 bps => int($bps),
4625 bps_rd => int($bps_rd),
4626 bps_wr => int($bps_wr),
4627 iops => int($iops),
4628 iops_rd => int($iops_rd),
4629 iops_wr => int($iops_wr),
4630 bps_max => int($bps_max),
4631 bps_rd_max => int($bps_rd_max),
4632 bps_wr_max => int($bps_wr_max),
4633 iops_max => int($iops_max),
4634 iops_rd_max => int($iops_rd_max),
4635 iops_wr_max => int($iops_wr_max),
4636 bps_max_length => int($bps_max_length),
4637 bps_rd_max_length => int($bps_rd_max_length),
4638 bps_wr_max_length => int($bps_wr_max_length),
4639 iops_max_length => int($iops_max_length),
4640 iops_rd_max_length => int($iops_rd_max_length),
4641 iops_wr_max_length => int($iops_wr_max_length),
4642 );
4643
4644 }
4645
4646 sub qemu_block_resize {
4647 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4648
4649 my $running = check_running($vmid);
4650
4651 PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4652
4653 return if !$running;
4654
4655 my $padding = (1024 - $size % 1024) % 1024;
4656 $size = $size + $padding;
4657
4658 mon_cmd(
4659 $vmid,
4660 "block_resize",
4661 device => $deviceid,
4662 size => int($size),
4663 timeout => 60,
4664 );
4665 }
4666
4667 sub qemu_volume_snapshot {
4668 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4669
4670 my $running = check_running($vmid);
4671
4672 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4673 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4674 } else {
4675 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4676 }
4677 }
4678
4679 sub qemu_volume_snapshot_delete {
4680 my ($vmid, $storecfg, $volid, $snap) = @_;
4681
4682 my $running = check_running($vmid);
4683 my $attached_deviceid;
4684
4685 if ($running) {
4686 my $conf = PVE::QemuConfig->load_config($vmid);
4687 PVE::QemuConfig->foreach_volume($conf, sub {
4688 my ($ds, $drive) = @_;
4689 $attached_deviceid = "drive-$ds" if $drive->{file} eq $volid;
4690 });
4691 }
4692
4693 if ($attached_deviceid && do_snapshots_with_qemu($storecfg, $volid, $attached_deviceid)) {
4694 mon_cmd(
4695 $vmid,
4696 'blockdev-snapshot-delete-internal-sync',
4697 device => $attached_deviceid,
4698 name => $snap,
4699 );
4700 } else {
4701 PVE::Storage::volume_snapshot_delete(
4702 $storecfg, $volid, $snap, $attached_deviceid ? 1 : undef);
4703 }
4704 }
4705
4706 sub set_migration_caps {
4707 my ($vmid, $savevm) = @_;
4708
4709 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4710
4711 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4712 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4713
4714 my $cap_ref = [];
4715
4716 my $enabled_cap = {
4717 "auto-converge" => 1,
4718 "xbzrle" => 1,
4719 "x-rdma-pin-all" => 0,
4720 "zero-blocks" => 0,
4721 "compress" => 0,
4722 "dirty-bitmaps" => $dirty_bitmaps,
4723 };
4724
4725 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4726
4727 for my $supported_capability (@$supported_capabilities) {
4728 push @$cap_ref, {
4729 capability => $supported_capability->{capability},
4730 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4731 };
4732 }
4733
4734 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4735 }
4736
4737 sub foreach_volid {
4738 my ($conf, $func, @param) = @_;
4739
4740 my $volhash = {};
4741
4742 my $test_volid = sub {
4743 my ($key, $drive, $snapname, $pending) = @_;
4744
4745 my $volid = $drive->{file};
4746 return if !$volid;
4747
4748 $volhash->{$volid}->{cdrom} //= 1;
4749 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4750
4751 my $replicate = $drive->{replicate} // 1;
4752 $volhash->{$volid}->{replicate} //= 0;
4753 $volhash->{$volid}->{replicate} = 1 if $replicate;
4754
4755 $volhash->{$volid}->{shared} //= 0;
4756 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4757
4758 $volhash->{$volid}->{is_unused} //= 0;
4759 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4760
4761 $volhash->{$volid}->{is_attached} //= 0;
4762 $volhash->{$volid}->{is_attached} = 1
4763 if !$volhash->{$volid}->{is_unused} && !defined($snapname) && !$pending;
4764
4765 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4766 if defined($snapname);
4767
4768 $volhash->{$volid}->{referenced_in_pending} = 1 if $pending;
4769
4770 my $size = $drive->{size};
4771 $volhash->{$volid}->{size} //= $size if $size;
4772
4773 $volhash->{$volid}->{is_vmstate} //= 0;
4774 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4775
4776 $volhash->{$volid}->{is_tpmstate} //= 0;
4777 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4778
4779 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4780 };
4781
4782 my $include_opts = {
4783 extra_keys => ['vmstate'],
4784 include_unused => 1,
4785 };
4786
4787 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4788
4789 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $test_volid, undef, 1)
4790 if defined($conf->{pending}) && $conf->{pending}->%*;
4791
4792 foreach my $snapname (keys %{$conf->{snapshots}}) {
4793 my $snap = $conf->{snapshots}->{$snapname};
4794 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4795 }
4796
4797 foreach my $volid (keys %$volhash) {
4798 &$func($volid, $volhash->{$volid}, @param);
4799 }
4800 }
4801
4802 my $fast_plug_option = {
4803 'description' => 1,
4804 'hookscript' => 1,
4805 'lock' => 1,
4806 'migrate_downtime' => 1,
4807 'migrate_speed' => 1,
4808 'name' => 1,
4809 'onboot' => 1,
4810 'protection' => 1,
4811 'shares' => 1,
4812 'startup' => 1,
4813 'tags' => 1,
4814 'vmstatestorage' => 1,
4815 };
4816
4817 for my $opt (keys %$confdesc_cloudinit) {
4818 $fast_plug_option->{$opt} = 1;
4819 };
4820
4821 # hotplug changes in [PENDING]
4822 # $selection hash can be used to only apply specified options, for
4823 # example: { cores => 1 } (only apply changed 'cores')
4824 # $errors ref is used to return error messages
4825 sub vmconfig_hotplug_pending {
4826 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4827
4828 my $defaults = load_defaults();
4829 my $arch = get_vm_arch($conf);
4830 my $machine_type = get_vm_machine($conf, undef, $arch);
4831
4832 # commit values which do not have any impact on running VM first
4833 # Note: those option cannot raise errors, we we do not care about
4834 # $selection and always apply them.
4835
4836 my $add_error = sub {
4837 my ($opt, $msg) = @_;
4838 $errors->{$opt} = "hotplug problem - $msg";
4839 };
4840
4841 my $cloudinit_pending_properties = PVE::QemuServer::cloudinit_pending_properties();
4842
4843 my $cloudinit_record_changed = sub {
4844 my ($conf, $opt, $old, $new) = @_;
4845 return if !$cloudinit_pending_properties->{$opt};
4846
4847 my $ci = ($conf->{cloudinit} //= {});
4848
4849 my $recorded = $ci->{$opt};
4850 my %added = map { $_ => 1 } PVE::Tools::split_list(delete($ci->{added}) // '');
4851
4852 if (defined($new)) {
4853 if (defined($old)) {
4854 # an existing value is being modified
4855 if (defined($recorded)) {
4856 # the value was already not in sync
4857 if ($new eq $recorded) {
4858 # a value is being reverted to the cloud-init state:
4859 delete $ci->{$opt};
4860 delete $added{$opt};
4861 } else {
4862 # the value was changed multiple times, do nothing
4863 }
4864 } elsif ($added{$opt}) {
4865 # the value had been marked as added and is being changed, do nothing
4866 } else {
4867 # the value is new, record it:
4868 $ci->{$opt} = $old;
4869 }
4870 } else {
4871 # a new value is being added
4872 if (defined($recorded)) {
4873 # it was already not in sync
4874 if ($new eq $recorded) {
4875 # a value is being reverted to the cloud-init state:
4876 delete $ci->{$opt};
4877 delete $added{$opt};
4878 } else {
4879 # the value had temporarily been removed, do nothing
4880 }
4881 } elsif ($added{$opt}) {
4882 # the value had been marked as added already, do nothing
4883 } else {
4884 # the value is new, add it
4885 $added{$opt} = 1;
4886 }
4887 }
4888 } elsif (!defined($old)) {
4889 # a non-existent value is being removed? ignore...
4890 } else {
4891 # a value is being deleted
4892 if (defined($recorded)) {
4893 # a value was already recorded, just keep it
4894 } elsif ($added{$opt}) {
4895 # the value was marked as added, remove it
4896 delete $added{$opt};
4897 } else {
4898 # a previously unrecorded value is being removed, record the old value:
4899 $ci->{$opt} = $old;
4900 }
4901 }
4902
4903 my $added = join(',', sort keys %added);
4904 $ci->{added} = $added if length($added);
4905 };
4906
4907 my $changes = 0;
4908 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4909 if ($fast_plug_option->{$opt}) {
4910 my $new = delete $conf->{pending}->{$opt};
4911 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $new);
4912 $conf->{$opt} = $new;
4913 $changes = 1;
4914 }
4915 }
4916
4917 if ($changes) {
4918 PVE::QemuConfig->write_config($vmid, $conf);
4919 }
4920
4921 my $ostype = $conf->{ostype};
4922 my $version = extract_version($machine_type, get_running_qemu_version($vmid));
4923 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
4924 my $usb_hotplug = $hotplug_features->{usb}
4925 && min_version($version, 7, 1)
4926 && defined($ostype) && ($ostype eq 'l26' || windows_version($ostype) > 7);
4927
4928 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
4929 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4930
4931 foreach my $opt (sort keys %$pending_delete_hash) {
4932 next if $selection && !$selection->{$opt};
4933 my $force = $pending_delete_hash->{$opt}->{force};
4934 eval {
4935 if ($opt eq 'hotplug') {
4936 die "skip\n" if ($conf->{hotplug} =~ /(cpu|memory)/);
4937 } elsif ($opt eq 'tablet') {
4938 die "skip\n" if !$hotplug_features->{usb};
4939 if ($defaults->{tablet}) {
4940 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4941 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4942 if $arch eq 'aarch64';
4943 } else {
4944 vm_deviceunplug($vmid, $conf, 'tablet');
4945 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4946 }
4947 } elsif ($opt =~ m/^usb(\d+)$/) {
4948 my $index = $1;
4949 die "skip\n" if !$usb_hotplug;
4950 vm_deviceunplug($vmid, $conf, "usbredirdev$index"); # if it's a spice port
4951 vm_deviceunplug($vmid, $conf, $opt);
4952 } elsif ($opt eq 'vcpus') {
4953 die "skip\n" if !$hotplug_features->{cpu};
4954 qemu_cpu_hotplug($vmid, $conf, undef);
4955 } elsif ($opt eq 'balloon') {
4956 # enable balloon device is not hotpluggable
4957 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
4958 # here we reset the ballooning value to memory
4959 my $balloon = get_current_memory($conf->{memory});
4960 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4961 } elsif ($fast_plug_option->{$opt}) {
4962 # do nothing
4963 } elsif ($opt =~ m/^net(\d+)$/) {
4964 die "skip\n" if !$hotplug_features->{network};
4965 vm_deviceunplug($vmid, $conf, $opt);
4966 if($have_sdn) {
4967 my $net = PVE::QemuServer::parse_net($conf->{$opt});
4968 PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{macaddr}, $conf->{name});
4969 }
4970 } elsif (is_valid_drivename($opt)) {
4971 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
4972 vm_deviceunplug($vmid, $conf, $opt);
4973 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4974 } elsif ($opt =~ m/^memory$/) {
4975 die "skip\n" if !$hotplug_features->{memory};
4976 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf);
4977 } elsif ($opt eq 'cpuunits') {
4978 $cgroup->change_cpu_shares(undef);
4979 } elsif ($opt eq 'cpulimit') {
4980 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
4981 } else {
4982 die "skip\n";
4983 }
4984 };
4985 if (my $err = $@) {
4986 &$add_error($opt, $err) if $err ne "skip\n";
4987 } else {
4988 my $old = delete $conf->{$opt};
4989 $cloudinit_record_changed->($conf, $opt, $old, undef);
4990 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4991 }
4992 }
4993
4994 my $cloudinit_opt;
4995 foreach my $opt (keys %{$conf->{pending}}) {
4996 next if $selection && !$selection->{$opt};
4997 my $value = $conf->{pending}->{$opt};
4998 eval {
4999 if ($opt eq 'hotplug') {
5000 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
5001 die "skip\n" if ($value =~ /cpu/) || ($value !~ /cpu/ && $conf->{hotplug} =~ /cpu/);
5002 } elsif ($opt eq 'tablet') {
5003 die "skip\n" if !$hotplug_features->{usb};
5004 if ($value == 1) {
5005 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5006 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5007 if $arch eq 'aarch64';
5008 } elsif ($value == 0) {
5009 vm_deviceunplug($vmid, $conf, 'tablet');
5010 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
5011 }
5012 } elsif ($opt =~ m/^usb(\d+)$/) {
5013 my $index = $1;
5014 die "skip\n" if !$usb_hotplug;
5015 my $d = eval { parse_property_string('pve-qm-usb', $value) };
5016 my $id = $opt;
5017 if ($d->{host} =~ m/^spice$/i) {
5018 $id = "usbredirdev$index";
5019 }
5020 qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type);
5021 } elsif ($opt eq 'vcpus') {
5022 die "skip\n" if !$hotplug_features->{cpu};
5023 qemu_cpu_hotplug($vmid, $conf, $value);
5024 } elsif ($opt eq 'balloon') {
5025 # enable/disable balloning device is not hotpluggable
5026 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
5027 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
5028 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
5029
5030 # allow manual ballooning if shares is set to zero
5031 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
5032 my $memory = get_current_memory($conf->{memory});
5033 my $balloon = $conf->{pending}->{balloon} || $memory;
5034 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
5035 }
5036 } elsif ($opt =~ m/^net(\d+)$/) {
5037 # some changes can be done without hotplug
5038 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
5039 $vmid, $opt, $value, $arch, $machine_type);
5040 } elsif (is_valid_drivename($opt)) {
5041 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
5042 # some changes can be done without hotplug
5043 my $drive = parse_drive($opt, $value);
5044 if (drive_is_cloudinit($drive)) {
5045 $cloudinit_opt = [$opt, $drive];
5046 # apply all the other changes first, then generate the cloudinit disk
5047 die "skip\n";
5048 }
5049 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5050 $vmid, $opt, $value, $arch, $machine_type);
5051 } elsif ($opt =~ m/^memory$/) { #dimms
5052 die "skip\n" if !$hotplug_features->{memory};
5053 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $value);
5054 } elsif ($opt eq 'cpuunits') {
5055 my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
5056 $cgroup->change_cpu_shares($new_cpuunits);
5057 } elsif ($opt eq 'cpulimit') {
5058 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
5059 $cgroup->change_cpu_quota($cpulimit, 100000);
5060 } elsif ($opt eq 'agent') {
5061 vmconfig_update_agent($conf, $opt, $value);
5062 } else {
5063 die "skip\n"; # skip non-hot-pluggable options
5064 }
5065 };
5066 if (my $err = $@) {
5067 &$add_error($opt, $err) if $err ne "skip\n";
5068 } else {
5069 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $value);
5070 $conf->{$opt} = $value;
5071 delete $conf->{pending}->{$opt};
5072 }
5073 }
5074
5075 if (defined($cloudinit_opt)) {
5076 my ($opt, $drive) = @$cloudinit_opt;
5077 my $value = $conf->{pending}->{$opt};
5078 eval {
5079 my $temp = {%$conf, $opt => $value};
5080 PVE::QemuServer::Cloudinit::apply_cloudinit_config($temp, $vmid);
5081 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5082 $vmid, $opt, $value, $arch, $machine_type);
5083 };
5084 if (my $err = $@) {
5085 &$add_error($opt, $err) if $err ne "skip\n";
5086 } else {
5087 $conf->{$opt} = $value;
5088 delete $conf->{pending}->{$opt};
5089 }
5090 }
5091
5092 # unplug xhci controller if no usb device is left
5093 if ($usb_hotplug) {
5094 my $has_usb = 0;
5095 for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
5096 next if !defined($conf->{"usb$i"});
5097 $has_usb = 1;
5098 last;
5099 }
5100 if (!$has_usb) {
5101 vm_deviceunplug($vmid, $conf, 'xhci');
5102 }
5103 }
5104
5105 PVE::QemuConfig->write_config($vmid, $conf);
5106
5107 if ($hotplug_features->{cloudinit} && PVE::QemuServer::Cloudinit::has_changes($conf)) {
5108 PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid);
5109 }
5110 }
5111
5112 sub try_deallocate_drive {
5113 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
5114
5115 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
5116 my $volid = $drive->{file};
5117 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
5118 my $sid = PVE::Storage::parse_volume_id($volid);
5119 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
5120
5121 # check if the disk is really unused
5122 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
5123 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
5124 PVE::Storage::vdisk_free($storecfg, $volid);
5125 return 1;
5126 } else {
5127 # If vm is not owner of this disk remove from config
5128 return 1;
5129 }
5130 }
5131
5132 return;
5133 }
5134
5135 sub vmconfig_delete_or_detach_drive {
5136 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
5137
5138 my $drive = parse_drive($opt, $conf->{$opt});
5139
5140 my $rpcenv = PVE::RPCEnvironment::get();
5141 my $authuser = $rpcenv->get_user();
5142
5143 if ($force) {
5144 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
5145 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
5146 } else {
5147 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
5148 }
5149 }
5150
5151
5152
5153 sub vmconfig_apply_pending {
5154 my ($vmid, $conf, $storecfg, $errors, $skip_cloud_init) = @_;
5155
5156 return if !scalar(keys %{$conf->{pending}});
5157
5158 my $add_apply_error = sub {
5159 my ($opt, $msg) = @_;
5160 my $err_msg = "unable to apply pending change $opt : $msg";
5161 $errors->{$opt} = $err_msg;
5162 warn $err_msg;
5163 };
5164
5165 # cold plug
5166
5167 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
5168 foreach my $opt (sort keys %$pending_delete_hash) {
5169 my $force = $pending_delete_hash->{$opt}->{force};
5170 eval {
5171 if ($opt =~ m/^unused/) {
5172 die "internal error";
5173 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5174 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5175 } elsif (defined($conf->{$opt}) && $opt =~ m/^net\d+$/) {
5176 if($have_sdn) {
5177 my $net = PVE::QemuServer::parse_net($conf->{$opt});
5178 eval { PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{macaddr}, $conf->{name}) };
5179 warn if $@;
5180 }
5181 }
5182 };
5183 if (my $err = $@) {
5184 $add_apply_error->($opt, $err);
5185 } else {
5186 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5187 delete $conf->{$opt};
5188 }
5189 }
5190
5191 PVE::QemuConfig->cleanup_pending($conf);
5192
5193 my $generate_cloudinit = $skip_cloud_init ? 0 : undef;
5194
5195 foreach my $opt (keys %{$conf->{pending}}) { # add/change
5196 next if $opt eq 'delete'; # just to be sure
5197 eval {
5198 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5199 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
5200 } elsif (defined($conf->{pending}->{$opt}) && $opt =~ m/^net\d+$/) {
5201 return if !$have_sdn; # return from eval if SDN is not available
5202
5203 my $new_net = PVE::QemuServer::parse_net($conf->{pending}->{$opt});
5204 if ($conf->{$opt}) {
5205 my $old_net = PVE::QemuServer::parse_net($conf->{$opt});
5206
5207 if (defined($old_net->{bridge}) && defined($old_net->{macaddr}) && (
5208 safe_string_ne($old_net->{bridge}, $new_net->{bridge}) ||
5209 safe_string_ne($old_net->{macaddr}, $new_net->{macaddr})
5210 )) {
5211 PVE::Network::SDN::Vnets::del_ips_from_mac($old_net->{bridge}, $old_net->{macaddr}, $conf->{name});
5212 }
5213 }
5214 #fixme: reuse ip if mac change && same bridge
5215 PVE::Network::SDN::Vnets::add_next_free_cidr($new_net->{bridge}, $conf->{name}, $new_net->{macaddr}, $vmid, undef, 1);
5216 }
5217 };
5218 if (my $err = $@) {
5219 $add_apply_error->($opt, $err);
5220 } else {
5221
5222 if (is_valid_drivename($opt)) {
5223 my $drive = parse_drive($opt, $conf->{pending}->{$opt});
5224 $generate_cloudinit //= 1 if drive_is_cloudinit($drive);
5225 }
5226
5227 $conf->{$opt} = delete $conf->{pending}->{$opt};
5228 }
5229 }
5230
5231 # write all changes at once to avoid unnecessary i/o
5232 PVE::QemuConfig->write_config($vmid, $conf);
5233 if ($generate_cloudinit) {
5234 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5235 # After successful generation and if there were changes to be applied, update the
5236 # config to drop the {cloudinit} entry.
5237 PVE::QemuConfig->write_config($vmid, $conf);
5238 }
5239 }
5240 }
5241
5242 sub vmconfig_update_net {
5243 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5244
5245 my $newnet = parse_net($value);
5246
5247 if ($conf->{$opt}) {
5248 my $oldnet = parse_net($conf->{$opt});
5249
5250 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5251 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5252 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
5253 safe_num_ne($oldnet->{mtu}, $newnet->{mtu}) ||
5254 !($newnet->{bridge} && $oldnet->{bridge})
5255 ) { # bridge/nat mode change
5256
5257 # for non online change, we try to hot-unplug
5258 die "skip\n" if !$hotplug;
5259 vm_deviceunplug($vmid, $conf, $opt);
5260
5261 if ($have_sdn) {
5262 PVE::Network::SDN::Vnets::del_ips_from_mac($oldnet->{bridge}, $oldnet->{macaddr}, $conf->{name});
5263 }
5264
5265 } else {
5266
5267 die "internal error" if $opt !~ m/net(\d+)/;
5268 my $iface = "tap${vmid}i$1";
5269
5270 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5271 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5272 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5273 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})
5274 ) {
5275 PVE::Network::tap_unplug($iface);
5276
5277 #set link_down in guest if bridge or vlan change to notify guest (dhcp renew for example)
5278 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5279 safe_num_ne($oldnet->{tag}, $newnet->{tag})
5280 ) {
5281 qemu_set_link_status($vmid, $opt, 0);
5282 }
5283
5284 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge})) {
5285 if ($have_sdn) {
5286 PVE::Network::SDN::Vnets::del_ips_from_mac($oldnet->{bridge}, $oldnet->{macaddr}, $conf->{name});
5287 PVE::Network::SDN::Vnets::add_next_free_cidr($newnet->{bridge}, $conf->{name}, $newnet->{macaddr}, $vmid, undef, 1);
5288 }
5289 }
5290
5291 if ($have_sdn) {
5292 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5293 } else {
5294 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5295 }
5296
5297 #set link_up in guest if bridge or vlan change to notify guest (dhcp renew for example)
5298 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5299 safe_num_ne($oldnet->{tag}, $newnet->{tag})
5300 ) {
5301 qemu_set_link_status($vmid, $opt, 1);
5302 }
5303
5304 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
5305 # Rate can be applied on its own but any change above needs to
5306 # include the rate in tap_plug since OVS resets everything.
5307 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
5308 }
5309
5310 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
5311 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5312 }
5313
5314 return 1;
5315 }
5316 }
5317
5318 if ($hotplug) {
5319 if ($have_sdn) {
5320 PVE::Network::SDN::Vnets::add_next_free_cidr($newnet->{bridge}, $conf->{name}, $newnet->{macaddr}, $vmid, undef, 1);
5321 PVE::Network::SDN::Vnets::add_dhcp_mapping($newnet->{bridge}, $newnet->{macaddr}, $vmid, $conf->{name});
5322 }
5323 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5324 } else {
5325 die "skip\n";
5326 }
5327 }
5328
5329 sub vmconfig_update_agent {
5330 my ($conf, $opt, $value) = @_;
5331
5332 die "skip\n" if !$conf->{$opt};
5333
5334 my $hotplug_options = { fstrim_cloned_disks => 1 };
5335
5336 my $old_agent = parse_guest_agent($conf);
5337 my $agent = parse_guest_agent({$opt => $value});
5338
5339 for my $option (keys %$agent) { # added/changed options
5340 next if defined($hotplug_options->{$option});
5341 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5342 }
5343
5344 for my $option (keys %$old_agent) { # removed options
5345 next if defined($hotplug_options->{$option});
5346 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5347 }
5348
5349 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
5350 }
5351
5352 sub vmconfig_update_disk {
5353 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5354
5355 my $drive = parse_drive($opt, $value);
5356
5357 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5358 my $media = $drive->{media} || 'disk';
5359 my $oldmedia = $old_drive->{media} || 'disk';
5360 die "unable to change media type\n" if $media ne $oldmedia;
5361
5362 if (!drive_is_cdrom($old_drive)) {
5363
5364 if ($drive->{file} ne $old_drive->{file}) {
5365
5366 die "skip\n" if !$hotplug;
5367
5368 # unplug and register as unused
5369 vm_deviceunplug($vmid, $conf, $opt);
5370 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5371
5372 } else {
5373 # update existing disk
5374
5375 # skip non hotpluggable value
5376 if (safe_string_ne($drive->{aio}, $old_drive->{aio}) ||
5377 safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5378 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5379 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5380 safe_string_ne($drive->{product}, $old_drive->{product}) ||
5381 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5382 safe_string_ne($drive->{ssd}, $old_drive->{ssd}) ||
5383 safe_string_ne($drive->{vendor}, $old_drive->{vendor}) ||
5384 safe_string_ne($drive->{ro}, $old_drive->{ro})) {
5385 die "skip\n";
5386 }
5387
5388 # apply throttle
5389 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5390 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5391 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5392 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5393 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5394 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5395 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5396 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5397 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5398 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5399 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5400 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5401 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5402 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5403 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5404 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5405 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5406 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5407
5408 qemu_block_set_io_throttle(
5409 $vmid,"drive-$opt",
5410 ($drive->{mbps} || 0)*1024*1024,
5411 ($drive->{mbps_rd} || 0)*1024*1024,
5412 ($drive->{mbps_wr} || 0)*1024*1024,
5413 $drive->{iops} || 0,
5414 $drive->{iops_rd} || 0,
5415 $drive->{iops_wr} || 0,
5416 ($drive->{mbps_max} || 0)*1024*1024,
5417 ($drive->{mbps_rd_max} || 0)*1024*1024,
5418 ($drive->{mbps_wr_max} || 0)*1024*1024,
5419 $drive->{iops_max} || 0,
5420 $drive->{iops_rd_max} || 0,
5421 $drive->{iops_wr_max} || 0,
5422 $drive->{bps_max_length} || 1,
5423 $drive->{bps_rd_max_length} || 1,
5424 $drive->{bps_wr_max_length} || 1,
5425 $drive->{iops_max_length} || 1,
5426 $drive->{iops_rd_max_length} || 1,
5427 $drive->{iops_wr_max_length} || 1,
5428 );
5429
5430 }
5431
5432 return 1;
5433 }
5434
5435 } else { # cdrom
5436
5437 if ($drive->{file} eq 'none') {
5438 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5439 if (drive_is_cloudinit($old_drive)) {
5440 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5441 }
5442 } else {
5443 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5444
5445 # force eject if locked
5446 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5447
5448 if ($path) {
5449 mon_cmd($vmid, "blockdev-change-medium",
5450 id => "$opt", filename => "$path");
5451 }
5452 }
5453
5454 return 1;
5455 }
5456 }
5457
5458 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5459 # hotplug new disks
5460 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5461 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5462 }
5463
5464 sub vmconfig_update_cloudinit_drive {
5465 my ($storecfg, $conf, $vmid) = @_;
5466
5467 my $cloudinit_ds = undef;
5468 my $cloudinit_drive = undef;
5469
5470 PVE::QemuConfig->foreach_volume($conf, sub {
5471 my ($ds, $drive) = @_;
5472 if (PVE::QemuServer::drive_is_cloudinit($drive)) {
5473 $cloudinit_ds = $ds;
5474 $cloudinit_drive = $drive;
5475 }
5476 });
5477
5478 return if !$cloudinit_drive;
5479
5480 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5481 PVE::QemuConfig->write_config($vmid, $conf);
5482 }
5483
5484 my $running = PVE::QemuServer::check_running($vmid);
5485
5486 if ($running) {
5487 my $path = PVE::Storage::path($storecfg, $cloudinit_drive->{file});
5488 if ($path) {
5489 mon_cmd($vmid, "eject", force => JSON::true, id => "$cloudinit_ds");
5490 mon_cmd($vmid, "blockdev-change-medium", id => "$cloudinit_ds", filename => "$path");
5491 }
5492 }
5493 }
5494
5495 # called in locked context by incoming migration
5496 sub vm_migrate_get_nbd_disks {
5497 my ($storecfg, $conf, $replicated_volumes) = @_;
5498
5499 my $local_volumes = {};
5500 PVE::QemuConfig->foreach_volume($conf, sub {
5501 my ($ds, $drive) = @_;
5502
5503 return if drive_is_cdrom($drive);
5504 return if $ds eq 'tpmstate0';
5505
5506 my $volid = $drive->{file};
5507
5508 return if !$volid;
5509
5510 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5511
5512 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5513 return if $scfg->{shared};
5514
5515 my $format = qemu_img_format($scfg, $volname);
5516
5517 # replicated disks re-use existing state via bitmap
5518 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5519 $local_volumes->{$ds} = [$volid, $storeid, $drive, $use_existing, $format];
5520 });
5521 return $local_volumes;
5522 }
5523
5524 # called in locked context by incoming migration
5525 sub vm_migrate_alloc_nbd_disks {
5526 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5527
5528 my $nbd = {};
5529 foreach my $opt (sort keys %$source_volumes) {
5530 my ($volid, $storeid, $drive, $use_existing, $format) = @{$source_volumes->{$opt}};
5531
5532 if ($use_existing) {
5533 $nbd->{$opt}->{drivestr} = print_drive($drive);
5534 $nbd->{$opt}->{volid} = $volid;
5535 $nbd->{$opt}->{replicated} = 1;
5536 next;
5537 }
5538
5539 $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
5540
5541 # order of precedence, filtered by whether storage supports it:
5542 # 1. explicit requested format
5543 # 2. default format of storage
5544 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5545 $format = $defFormat if !$format || !grep { $format eq $_ } $validFormats->@*;
5546
5547 my $size = $drive->{size} / 1024;
5548 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5549 my $newdrive = $drive;
5550 $newdrive->{format} = $format;
5551 $newdrive->{file} = $newvolid;
5552 my $drivestr = print_drive($newdrive);
5553 $nbd->{$opt}->{drivestr} = $drivestr;
5554 $nbd->{$opt}->{volid} = $newvolid;
5555 }
5556
5557 return $nbd;
5558 }
5559
5560 # see vm_start_nolock for parameters, additionally:
5561 # migrate_opts:
5562 # storagemap = parsed storage map for allocating NBD disks
5563 sub vm_start {
5564 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5565
5566 return PVE::QemuConfig->lock_config($vmid, sub {
5567 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5568
5569 die "you can't start a vm if it's a template\n"
5570 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5571
5572 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5573 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5574
5575 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5576
5577 if ($has_backup_lock && $running) {
5578 # a backup is currently running, attempt to start the guest in the
5579 # existing QEMU instance
5580 return vm_resume($vmid);
5581 }
5582
5583 PVE::QemuConfig->check_lock($conf)
5584 if !($params->{skiplock} || $has_suspended_lock);
5585
5586 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5587
5588 die "VM $vmid already running\n" if $running;
5589
5590 if (my $storagemap = $migrate_opts->{storagemap}) {
5591 my $replicated = $migrate_opts->{replicated_volumes};
5592 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5593 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5594
5595 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5596 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5597 }
5598 }
5599
5600 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5601 });
5602 }
5603
5604
5605 # params:
5606 # statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5607 # skiplock => 0/1, skip checking for config lock
5608 # skiptemplate => 0/1, skip checking whether VM is template
5609 # forcemachine => to force QEMU machine (rollback/migration)
5610 # forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5611 # timeout => in seconds
5612 # paused => start VM in paused state (backup)
5613 # resume => resume from hibernation
5614 # live-restore-backing => {
5615 # sata0 => {
5616 # name => blockdev-name,
5617 # blockdev => "arg to the -blockdev command instantiating device named 'name'",
5618 # },
5619 # virtio2 => ...
5620 # }
5621 # migrate_opts:
5622 # nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5623 # migratedfrom => source node
5624 # spice_ticket => used for spice migration, passed via tunnel/stdin
5625 # network => CIDR of migration network
5626 # type => secure/insecure - tunnel over encrypted connection or plain-text
5627 # nbd_proto_version => int, 0 for TCP, 1 for UNIX
5628 # replicated_volumes => which volids should be re-used with bitmaps for nbd migration
5629 # offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
5630 # contained in config
5631 sub vm_start_nolock {
5632 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5633
5634 my $statefile = $params->{statefile};
5635 my $resume = $params->{resume};
5636
5637 my $migratedfrom = $migrate_opts->{migratedfrom};
5638 my $migration_type = $migrate_opts->{type};
5639
5640 my $res = {};
5641
5642 # clean up leftover reboot request files
5643 eval { clear_reboot_request($vmid); };
5644 warn $@ if $@;
5645
5646 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5647 vmconfig_apply_pending($vmid, $conf, $storecfg);
5648 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5649 }
5650
5651 # don't regenerate the ISO if the VM is started as part of a live migration
5652 # this way we can reuse the old ISO with the correct config
5653 if (!$migratedfrom) {
5654 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5655 # FIXME: apply_cloudinit_config updates $conf in this case, and it would only drop
5656 # $conf->{cloudinit}, so we could just not do this?
5657 # But we do it above, so for now let's be consistent.
5658 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5659 }
5660 }
5661
5662 # override offline migrated volumes, conf is out of date still
5663 if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
5664 for my $key (sort keys $offline_volumes->%*) {
5665 my $parsed = parse_drive($key, $conf->{$key});
5666 $parsed->{file} = $offline_volumes->{$key};
5667 $conf->{$key} = print_drive($parsed);
5668 }
5669 }
5670
5671 my $defaults = load_defaults();
5672
5673 # set environment variable useful inside network script
5674 # for remote migration the config is available on the target node!
5675 if (!$migrate_opts->{remote_node}) {
5676 $ENV{PVE_MIGRATED_FROM} = $migratedfrom;
5677 }
5678
5679 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5680
5681 my $forcemachine = $params->{forcemachine};
5682 my $forcecpu = $params->{forcecpu};
5683 if ($resume) {
5684 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5685 $forcemachine = $conf->{runningmachine};
5686 $forcecpu = $conf->{runningcpu};
5687 print "Resuming suspended VM\n";
5688 }
5689
5690 my ($cmd, $vollist, $spice_port, $pci_devices) = config_to_command($storecfg, $vmid,
5691 $conf, $defaults, $forcemachine, $forcecpu, $params->{'live-restore-backing'});
5692
5693 my $migration_ip;
5694 my $get_migration_ip = sub {
5695 my ($nodename) = @_;
5696
5697 return $migration_ip if defined($migration_ip);
5698
5699 my $cidr = $migrate_opts->{network};
5700
5701 if (!defined($cidr)) {
5702 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5703 $cidr = $dc_conf->{migration}->{network};
5704 }
5705
5706 if (defined($cidr)) {
5707 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5708
5709 die "could not get IP: no address configured on local " .
5710 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5711
5712 die "could not get IP: multiple addresses configured on local " .
5713 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5714
5715 $migration_ip = @$ips[0];
5716 }
5717
5718 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5719 if !defined($migration_ip);
5720
5721 return $migration_ip;
5722 };
5723
5724 if ($statefile) {
5725 if ($statefile eq 'tcp') {
5726 my $migrate = $res->{migrate} = { proto => 'tcp' };
5727 $migrate->{addr} = "localhost";
5728 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5729 my $nodename = nodename();
5730
5731 if (!defined($migration_type)) {
5732 if (defined($datacenterconf->{migration}->{type})) {
5733 $migration_type = $datacenterconf->{migration}->{type};
5734 } else {
5735 $migration_type = 'secure';
5736 }
5737 }
5738
5739 if ($migration_type eq 'insecure') {
5740 $migrate->{addr} = $get_migration_ip->($nodename);
5741 $migrate->{addr} = "[$migrate->{addr}]" if Net::IP::ip_is_ipv6($migrate->{addr});
5742 }
5743
5744 # see #4501: port reservation should be done close to usage - tell QEMU where to listen
5745 # via QMP later
5746 push @$cmd, '-incoming', 'defer';
5747 push @$cmd, '-S';
5748
5749 } elsif ($statefile eq 'unix') {
5750 # should be default for secure migrations as a ssh TCP forward
5751 # tunnel is not deterministic reliable ready and fails regurarly
5752 # to set up in time, so use UNIX socket forwards
5753 my $migrate = $res->{migrate} = { proto => 'unix' };
5754 $migrate->{addr} = "/run/qemu-server/$vmid.migrate";
5755 unlink $migrate->{addr};
5756
5757 $migrate->{uri} = "unix:$migrate->{addr}";
5758 push @$cmd, '-incoming', $migrate->{uri};
5759 push @$cmd, '-S';
5760
5761 } elsif (-e $statefile) {
5762 push @$cmd, '-loadstate', $statefile;
5763 } else {
5764 my $statepath = PVE::Storage::path($storecfg, $statefile);
5765 push @$vollist, $statefile;
5766 push @$cmd, '-loadstate', $statepath;
5767 }
5768 } elsif ($params->{paused}) {
5769 push @$cmd, '-S';
5770 }
5771
5772 my $memory = get_current_memory($conf->{memory});
5773 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $memory, $resume);
5774
5775 my $pci_reserve_list = [];
5776 for my $device (values $pci_devices->%*) {
5777 next if $device->{mdev}; # we don't reserve for mdev devices
5778 push $pci_reserve_list->@*, map { $_->{id} } $device->{ids}->@*;
5779 }
5780
5781 # reserve all PCI IDs before actually doing anything with them
5782 PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, $start_timeout);
5783
5784 eval {
5785 my $uuid;
5786 for my $id (sort keys %$pci_devices) {
5787 my $d = $pci_devices->{$id};
5788 my ($index) = ($id =~ m/^hostpci(\d+)$/);
5789
5790 my $chosen_mdev;
5791 for my $dev ($d->{ids}->@*) {
5792 my $info = eval { PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $index, $d->{mdev}) };
5793 if ($d->{mdev}) {
5794 warn $@ if $@;
5795 $chosen_mdev = $info;
5796 last if $chosen_mdev; # if successful, we're done
5797 } else {
5798 die $@ if $@;
5799 }
5800 }
5801
5802 next if !$d->{mdev};
5803 die "could not create mediated device\n" if !defined($chosen_mdev);
5804
5805 # nvidia grid needs the uuid of the mdev as qemu parameter
5806 if (!defined($uuid) && $chosen_mdev->{vendor} =~ m/^(0x)?10de$/) {
5807 if (defined($conf->{smbios1})) {
5808 my $smbios_conf = parse_smbios1($conf->{smbios1});
5809 $uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid});
5810 }
5811 $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $index) if !defined($uuid);
5812 }
5813 }
5814 push @$cmd, '-uuid', $uuid if defined($uuid);
5815 };
5816 if (my $err = $@) {
5817 eval { cleanup_pci_devices($vmid, $conf) };
5818 warn $@ if $@;
5819 die $err;
5820 }
5821
5822 PVE::Storage::activate_volumes($storecfg, $vollist);
5823
5824
5825 my %silence_std_outs = (outfunc => sub {}, errfunc => sub {});
5826 eval { run_command(['/bin/systemctl', 'reset-failed', "$vmid.scope"], %silence_std_outs) };
5827 eval { run_command(['/bin/systemctl', 'stop', "$vmid.scope"], %silence_std_outs) };
5828 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5829 # timeout should be more than enough here...
5830 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
5831
5832 my $cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
5833
5834 my %run_params = (
5835 timeout => $statefile ? undef : $start_timeout,
5836 umask => 0077,
5837 noerr => 1,
5838 );
5839
5840 # when migrating, prefix QEMU output so other side can pick up any
5841 # errors that might occur and show the user
5842 if ($migratedfrom) {
5843 $run_params{quiet} = 1;
5844 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5845 }
5846
5847 my %systemd_properties = (
5848 Slice => 'qemu.slice',
5849 KillMode => 'process',
5850 SendSIGKILL => 0,
5851 TimeoutStopUSec => ULONG_MAX, # infinity
5852 );
5853
5854 if (PVE::CGroup::cgroup_mode() == 2) {
5855 $systemd_properties{CPUWeight} = $cpuunits;
5856 } else {
5857 $systemd_properties{CPUShares} = $cpuunits;
5858 }
5859
5860 if (my $cpulimit = $conf->{cpulimit}) {
5861 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5862 }
5863 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5864
5865 my $run_qemu = sub {
5866 PVE::Tools::run_fork sub {
5867 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5868
5869 my $tpmpid;
5870 if ((my $tpm = $conf->{tpmstate0}) && !PVE::QemuConfig->is_template($conf)) {
5871 # start the TPM emulator so QEMU can connect on start
5872 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5873 }
5874
5875 my $exitcode = run_command($cmd, %run_params);
5876 if ($exitcode) {
5877 if ($tpmpid) {
5878 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5879 kill 'TERM', $tpmpid;
5880 }
5881 die "QEMU exited with code $exitcode\n";
5882 }
5883 };
5884 };
5885
5886 if ($conf->{hugepages}) {
5887
5888 my $code = sub {
5889 my $hotplug_features =
5890 parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
5891 my $hugepages_topology =
5892 PVE::QemuServer::Memory::hugepages_topology($conf, $hotplug_features->{memory});
5893
5894 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5895
5896 PVE::QemuServer::Memory::hugepages_mount();
5897 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5898
5899 eval { $run_qemu->() };
5900 if (my $err = $@) {
5901 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5902 if !$conf->{keephugepages};
5903 die $err;
5904 }
5905
5906 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5907 if !$conf->{keephugepages};
5908 };
5909 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5910
5911 } else {
5912 eval { $run_qemu->() };
5913 }
5914
5915 if (my $err = $@) {
5916 # deactivate volumes if start fails
5917 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5918 warn $@ if $@;
5919 eval { cleanup_pci_devices($vmid, $conf) };
5920 warn $@ if $@;
5921
5922 die "start failed: $err";
5923 }
5924
5925 # re-reserve all PCI IDs now that we can know the actual VM PID
5926 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5927 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, undef, $pid) };
5928 warn $@ if $@;
5929
5930 if (defined(my $migrate = $res->{migrate})) {
5931 if ($migrate->{proto} eq 'tcp') {
5932 my $nodename = nodename();
5933 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5934 $migrate->{port} = PVE::Tools::next_migrate_port($pfamily);
5935 $migrate->{uri} = "tcp:$migrate->{addr}:$migrate->{port}";
5936 mon_cmd($vmid, "migrate-incoming", uri => $migrate->{uri});
5937 }
5938 print "migration listens on $migrate->{uri}\n";
5939 } elsif ($statefile) {
5940 eval { mon_cmd($vmid, "cont"); };
5941 warn $@ if $@;
5942 }
5943
5944 #start nbd server for storage migration
5945 if (my $nbd = $migrate_opts->{nbd}) {
5946 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
5947
5948 my $migrate_storage_uri;
5949 # nbd_protocol_version > 0 for unix socket support
5950 if ($nbd_protocol_version > 0 && ($migration_type eq 'secure' || $migration_type eq 'websocket')) {
5951 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5952 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5953 $migrate_storage_uri = "nbd:unix:$socket_path";
5954 $res->{migrate}->{unix_sockets} = [$socket_path];
5955 } else {
5956 my $nodename = nodename();
5957 my $localip = $get_migration_ip->($nodename);
5958 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5959 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
5960
5961 mon_cmd($vmid, "nbd-server-start", addr => {
5962 type => 'inet',
5963 data => {
5964 host => "${localip}",
5965 port => "${storage_migrate_port}",
5966 },
5967 });
5968 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5969 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
5970 }
5971
5972 my $block_info = mon_cmd($vmid, "query-block");
5973 $block_info = { map { $_->{device} => $_ } $block_info->@* };
5974
5975 foreach my $opt (sort keys %$nbd) {
5976 my $drivestr = $nbd->{$opt}->{drivestr};
5977 my $volid = $nbd->{$opt}->{volid};
5978
5979 my $block_node = $block_info->{"drive-$opt"}->{inserted}->{'node-name'};
5980
5981 mon_cmd(
5982 $vmid,
5983 "block-export-add",
5984 id => "drive-$opt",
5985 'node-name' => $block_node,
5986 writable => JSON::true,
5987 type => "nbd",
5988 name => "drive-$opt", # NBD export name
5989 );
5990
5991 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
5992 print "storage migration listens on $nbd_uri volume:$drivestr\n";
5993 print "re-using replicated volume: $opt - $volid\n"
5994 if $nbd->{$opt}->{replicated};
5995
5996 $res->{drives}->{$opt} = $nbd->{$opt};
5997 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
5998 }
5999 }
6000
6001 if ($migratedfrom) {
6002 eval {
6003 set_migration_caps($vmid);
6004 };
6005 warn $@ if $@;
6006
6007 if ($spice_port) {
6008 print "spice listens on port $spice_port\n";
6009 $res->{spice_port} = $spice_port;
6010 if ($migrate_opts->{spice_ticket}) {
6011 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
6012 $migrate_opts->{spice_ticket});
6013 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
6014 }
6015 }
6016
6017 } else {
6018 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
6019 if !$statefile && $conf->{balloon};
6020
6021 foreach my $opt (keys %$conf) {
6022 next if $opt !~ m/^net\d+$/;
6023 my $nicconf = parse_net($conf->{$opt});
6024 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
6025 }
6026 add_nets_bridge_fdb($conf, $vmid);
6027 }
6028
6029 if (!defined($conf->{balloon}) || $conf->{balloon}) {
6030 eval {
6031 mon_cmd(
6032 $vmid,
6033 'qom-set',
6034 path => "machine/peripheral/balloon0",
6035 property => "guest-stats-polling-interval",
6036 value => 2
6037 );
6038 };
6039 log_warn("could not set polling interval for ballooning - $@") if $@;
6040 }
6041
6042 if ($resume) {
6043 print "Resumed VM, removing state\n";
6044 if (my $vmstate = $conf->{vmstate}) {
6045 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6046 PVE::Storage::vdisk_free($storecfg, $vmstate);
6047 }
6048 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
6049 PVE::QemuConfig->write_config($vmid, $conf);
6050 }
6051
6052 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
6053
6054 my ($current_machine, $is_deprecated) =
6055 PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
6056 if ($is_deprecated) {
6057 log_warn(
6058 "current machine version '$current_machine' is deprecated - see the documentation and ".
6059 "change to a newer one",
6060 );
6061 }
6062
6063 return $res;
6064 }
6065
6066 sub vm_commandline {
6067 my ($storecfg, $vmid, $snapname) = @_;
6068
6069 my $conf = PVE::QemuConfig->load_config($vmid);
6070
6071 my ($forcemachine, $forcecpu);
6072 if ($snapname) {
6073 my $snapshot = $conf->{snapshots}->{$snapname};
6074 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
6075
6076 # check for machine or CPU overrides in snapshot
6077 $forcemachine = $snapshot->{runningmachine};
6078 $forcecpu = $snapshot->{runningcpu};
6079
6080 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
6081
6082 $conf = $snapshot;
6083 }
6084
6085 my $defaults = load_defaults();
6086
6087 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
6088
6089 return PVE::Tools::cmd2string($cmd);
6090 }
6091
6092 sub vm_reset {
6093 my ($vmid, $skiplock) = @_;
6094
6095 PVE::QemuConfig->lock_config($vmid, sub {
6096
6097 my $conf = PVE::QemuConfig->load_config($vmid);
6098
6099 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6100
6101 mon_cmd($vmid, "system_reset");
6102 });
6103 }
6104
6105 sub get_vm_volumes {
6106 my ($conf) = @_;
6107
6108 my $vollist = [];
6109 foreach_volid($conf, sub {
6110 my ($volid, $attr) = @_;
6111
6112 return if $volid =~ m|^/|;
6113
6114 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
6115 return if !$sid;
6116
6117 push @$vollist, $volid;
6118 });
6119
6120 return $vollist;
6121 }
6122
6123 sub cleanup_pci_devices {
6124 my ($vmid, $conf) = @_;
6125
6126 foreach my $key (keys %$conf) {
6127 next if $key !~ m/^hostpci(\d+)$/;
6128 my $hostpciindex = $1;
6129 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
6130 my $d = parse_hostpci($conf->{$key});
6131 if ($d->{mdev}) {
6132 # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
6133 # don't want to break ABI just for this two liner
6134 my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
6135
6136 # some nvidia vgpu driver versions want to clean the mdevs up themselves, and error
6137 # out when we do it first. so wait for up to 10 seconds and then try it manually
6138 if ($d->{ids}->[0]->[0]->{vendor} =~ m/^(0x)?10de$/ && -e $dev_sysfs_dir) {
6139 my $count = 0;
6140 while (-e $dev_sysfs_dir && $count < 10) {
6141 sleep 1;
6142 $count++;
6143 }
6144 print "waited $count seconds for mediated device driver finishing clean up\n";
6145 }
6146
6147 if (-e $dev_sysfs_dir) {
6148 print "actively clean up mediated device with UUID $uuid\n";
6149 PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1");
6150 }
6151 }
6152 }
6153 PVE::QemuServer::PCI::remove_pci_reservation($vmid);
6154 }
6155
6156 sub vm_stop_cleanup {
6157 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
6158
6159 eval {
6160
6161 if (!$keepActive) {
6162 my $vollist = get_vm_volumes($conf);
6163 PVE::Storage::deactivate_volumes($storecfg, $vollist);
6164
6165 if (my $tpmdrive = $conf->{tpmstate0}) {
6166 my $tpm = parse_drive("tpmstate0", $tpmdrive);
6167 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
6168 if ($storeid) {
6169 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
6170 }
6171 }
6172 }
6173
6174 foreach my $ext (qw(mon qmp pid vnc qga)) {
6175 unlink "/var/run/qemu-server/${vmid}.$ext";
6176 }
6177
6178 if ($conf->{ivshmem}) {
6179 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
6180 # just delete it for now, VMs which have this already open do not
6181 # are affected, but new VMs will get a separated one. If this
6182 # becomes an issue we either add some sort of ref-counting or just
6183 # add a "don't delete on stop" flag to the ivshmem format.
6184 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
6185 }
6186
6187 cleanup_pci_devices($vmid, $conf);
6188
6189 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
6190 };
6191 warn $@ if $@; # avoid errors - just warn
6192 }
6193
6194 # call only in locked context
6195 sub _do_vm_stop {
6196 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
6197
6198 my $pid = check_running($vmid, $nocheck);
6199 return if !$pid;
6200
6201 my $conf;
6202 if (!$nocheck) {
6203 $conf = PVE::QemuConfig->load_config($vmid);
6204 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6205 if (!defined($timeout) && $shutdown && $conf->{startup}) {
6206 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
6207 $timeout = $opts->{down} if $opts->{down};
6208 }
6209 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
6210 }
6211
6212 eval {
6213 if ($shutdown) {
6214 if (defined($conf) && get_qga_key($conf, 'enabled')) {
6215 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
6216 } else {
6217 mon_cmd($vmid, "system_powerdown");
6218 }
6219 } else {
6220 mon_cmd($vmid, "quit");
6221 }
6222 };
6223 my $err = $@;
6224
6225 if (!$err) {
6226 $timeout = 60 if !defined($timeout);
6227
6228 my $count = 0;
6229 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6230 $count++;
6231 sleep 1;
6232 }
6233
6234 if ($count >= $timeout) {
6235 if ($force) {
6236 warn "VM still running - terminating now with SIGTERM\n";
6237 kill 15, $pid;
6238 } else {
6239 die "VM quit/powerdown failed - got timeout\n";
6240 }
6241 } else {
6242 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6243 return;
6244 }
6245 } else {
6246 if (!check_running($vmid, $nocheck)) {
6247 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
6248 return;
6249 }
6250 if ($force) {
6251 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
6252 kill 15, $pid;
6253 } else {
6254 die "VM quit/powerdown failed\n";
6255 }
6256 }
6257
6258 # wait again
6259 $timeout = 10;
6260
6261 my $count = 0;
6262 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6263 $count++;
6264 sleep 1;
6265 }
6266
6267 if ($count >= $timeout) {
6268 warn "VM still running - terminating now with SIGKILL\n";
6269 kill 9, $pid;
6270 sleep 1;
6271 }
6272
6273 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6274 }
6275
6276 # Note: use $nocheck to skip tests if VM configuration file exists.
6277 # We need that when migration VMs to other nodes (files already moved)
6278 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
6279 sub vm_stop {
6280 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
6281
6282 $force = 1 if !defined($force) && !$shutdown;
6283
6284 if ($migratedfrom){
6285 my $pid = check_running($vmid, $nocheck, $migratedfrom);
6286 kill 15, $pid if $pid;
6287 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
6288 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
6289 return;
6290 }
6291
6292 PVE::QemuConfig->lock_config($vmid, sub {
6293 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
6294 });
6295 }
6296
6297 sub vm_reboot {
6298 my ($vmid, $timeout) = @_;
6299
6300 PVE::QemuConfig->lock_config($vmid, sub {
6301 eval {
6302
6303 # only reboot if running, as qmeventd starts it again on a stop event
6304 return if !check_running($vmid);
6305
6306 create_reboot_request($vmid);
6307
6308 my $storecfg = PVE::Storage::config();
6309 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
6310
6311 };
6312 if (my $err = $@) {
6313 # avoid that the next normal shutdown will be confused for a reboot
6314 clear_reboot_request($vmid);
6315 die $err;
6316 }
6317 });
6318 }
6319
6320 # note: if using the statestorage parameter, the caller has to check privileges
6321 sub vm_suspend {
6322 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
6323
6324 my $conf;
6325 my $path;
6326 my $storecfg;
6327 my $vmstate;
6328
6329 PVE::QemuConfig->lock_config($vmid, sub {
6330
6331 $conf = PVE::QemuConfig->load_config($vmid);
6332
6333 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
6334 PVE::QemuConfig->check_lock($conf)
6335 if !($skiplock || $is_backing_up);
6336
6337 die "cannot suspend to disk during backup\n"
6338 if $is_backing_up && $includestate;
6339
6340 if ($includestate) {
6341 $conf->{lock} = 'suspending';
6342 my $date = strftime("%Y-%m-%d", localtime(time()));
6343 $storecfg = PVE::Storage::config();
6344 if (!$statestorage) {
6345 $statestorage = find_vmstate_storage($conf, $storecfg);
6346 # check permissions for the storage
6347 my $rpcenv = PVE::RPCEnvironment::get();
6348 if ($rpcenv->{type} ne 'cli') {
6349 my $authuser = $rpcenv->get_user();
6350 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
6351 }
6352 }
6353
6354
6355 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
6356 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
6357 $path = PVE::Storage::path($storecfg, $vmstate);
6358 PVE::QemuConfig->write_config($vmid, $conf);
6359 } else {
6360 mon_cmd($vmid, "stop");
6361 }
6362 });
6363
6364 if ($includestate) {
6365 # save vm state
6366 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
6367
6368 eval {
6369 set_migration_caps($vmid, 1);
6370 mon_cmd($vmid, "savevm-start", statefile => $path);
6371 for(;;) {
6372 my $state = mon_cmd($vmid, "query-savevm");
6373 if (!$state->{status}) {
6374 die "savevm not active\n";
6375 } elsif ($state->{status} eq 'active') {
6376 sleep(1);
6377 next;
6378 } elsif ($state->{status} eq 'completed') {
6379 print "State saved, quitting\n";
6380 last;
6381 } elsif ($state->{status} eq 'failed' && $state->{error}) {
6382 die "query-savevm failed with error '$state->{error}'\n"
6383 } else {
6384 die "query-savevm returned status '$state->{status}'\n";
6385 }
6386 }
6387 };
6388 my $err = $@;
6389
6390 PVE::QemuConfig->lock_config($vmid, sub {
6391 $conf = PVE::QemuConfig->load_config($vmid);
6392 if ($err) {
6393 # cleanup, but leave suspending lock, to indicate something went wrong
6394 eval {
6395 mon_cmd($vmid, "savevm-end");
6396 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6397 PVE::Storage::vdisk_free($storecfg, $vmstate);
6398 delete $conf->@{qw(vmstate runningmachine runningcpu)};
6399 PVE::QemuConfig->write_config($vmid, $conf);
6400 };
6401 warn $@ if $@;
6402 die $err;
6403 }
6404
6405 die "lock changed unexpectedly\n"
6406 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6407
6408 mon_cmd($vmid, "quit");
6409 $conf->{lock} = 'suspended';
6410 PVE::QemuConfig->write_config($vmid, $conf);
6411 });
6412 }
6413 }
6414
6415 # $nocheck is set when called as part of a migration - in this context the
6416 # location of the config file (source or target node) is not deterministic,
6417 # since migration cannot wait for pmxcfs to process the rename
6418 sub vm_resume {
6419 my ($vmid, $skiplock, $nocheck) = @_;
6420
6421 PVE::QemuConfig->lock_config($vmid, sub {
6422 my $res = mon_cmd($vmid, 'query-status');
6423 my $resume_cmd = 'cont';
6424 my $reset = 0;
6425 my $conf;
6426 if ($nocheck) {
6427 $conf = eval { PVE::QemuConfig->load_config($vmid) }; # try on target node
6428 if ($@) {
6429 my $vmlist = PVE::Cluster::get_vmlist();
6430 if (exists($vmlist->{ids}->{$vmid})) {
6431 my $node = $vmlist->{ids}->{$vmid}->{node};
6432 $conf = eval { PVE::QemuConfig->load_config($vmid, $node) }; # try on source node
6433 }
6434 if (!$conf) {
6435 PVE::Cluster::cfs_update(); # vmlist was wrong, invalidate cache
6436 $conf = PVE::QemuConfig->load_config($vmid); # last try on target node again
6437 }
6438 }
6439 } else {
6440 $conf = PVE::QemuConfig->load_config($vmid);
6441 }
6442
6443 if ($res->{status}) {
6444 return if $res->{status} eq 'running'; # job done, go home
6445 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6446 $reset = 1 if $res->{status} eq 'shutdown';
6447 }
6448
6449 if (!$nocheck) {
6450 PVE::QemuConfig->check_lock($conf)
6451 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
6452 }
6453
6454 if ($reset) {
6455 # required if a VM shuts down during a backup and we get a resume
6456 # request before the backup finishes for example
6457 mon_cmd($vmid, "system_reset");
6458 }
6459
6460 add_nets_bridge_fdb($conf, $vmid) if $resume_cmd eq 'cont';
6461
6462 mon_cmd($vmid, $resume_cmd);
6463 });
6464 }
6465
6466 sub vm_sendkey {
6467 my ($vmid, $skiplock, $key) = @_;
6468
6469 PVE::QemuConfig->lock_config($vmid, sub {
6470
6471 my $conf = PVE::QemuConfig->load_config($vmid);
6472
6473 # there is no qmp command, so we use the human monitor command
6474 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
6475 die $res if $res ne '';
6476 });
6477 }
6478
6479 sub check_bridge_access {
6480 my ($rpcenv, $authuser, $conf) = @_;
6481
6482 return 1 if $authuser eq 'root@pam';
6483
6484 for my $opt (sort keys $conf->%*) {
6485 next if $opt !~ m/^net\d+$/;
6486 my $net = parse_net($conf->{$opt});
6487 my ($bridge, $tag, $trunks) = $net->@{'bridge', 'tag', 'trunks'};
6488 PVE::GuestHelpers::check_vnet_access($rpcenv, $authuser, $bridge, $tag, $trunks);
6489 }
6490 return 1;
6491 };
6492
6493 sub check_mapping_access {
6494 my ($rpcenv, $user, $conf) = @_;
6495
6496 for my $opt (keys $conf->%*) {
6497 if ($opt =~ m/^usb\d+$/) {
6498 my $device = PVE::JSONSchema::parse_property_string('pve-qm-usb', $conf->{$opt});
6499 if (my $host = $device->{host}) {
6500 die "only root can set '$opt' config for real devices\n"
6501 if $host !~ m/^spice$/i && $user ne 'root@pam';
6502 } elsif ($device->{mapping}) {
6503 $rpcenv->check_full($user, "/mapping/usb/$device->{mapping}", ['Mapping.Use']);
6504 } else {
6505 die "either 'host' or 'mapping' must be set.\n";
6506 }
6507 } elsif ($opt =~ m/^hostpci\d+$/) {
6508 my $device = PVE::JSONSchema::parse_property_string('pve-qm-hostpci', $conf->{$opt});
6509 if ($device->{host}) {
6510 die "only root can set '$opt' config for non-mapped devices\n" if $user ne 'root@pam';
6511 } elsif ($device->{mapping}) {
6512 $rpcenv->check_full($user, "/mapping/pci/$device->{mapping}", ['Mapping.Use']);
6513 } else {
6514 die "either 'host' or 'mapping' must be set.\n";
6515 }
6516 }
6517 }
6518 };
6519
6520 sub check_restore_permissions {
6521 my ($rpcenv, $user, $conf) = @_;
6522
6523 check_bridge_access($rpcenv, $user, $conf);
6524 check_mapping_access($rpcenv, $user, $conf);
6525 }
6526 # vzdump restore implementaion
6527
6528 sub tar_archive_read_firstfile {
6529 my $archive = shift;
6530
6531 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6532
6533 # try to detect archive type first
6534 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
6535 die "unable to open file '$archive'\n";
6536 my $firstfile = <$fh>;
6537 kill 15, $pid;
6538 close $fh;
6539
6540 die "ERROR: archive contaions no data\n" if !$firstfile;
6541 chomp $firstfile;
6542
6543 return $firstfile;
6544 }
6545
6546 sub tar_restore_cleanup {
6547 my ($storecfg, $statfile) = @_;
6548
6549 print STDERR "starting cleanup\n";
6550
6551 if (my $fd = IO::File->new($statfile, "r")) {
6552 while (defined(my $line = <$fd>)) {
6553 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6554 my $volid = $2;
6555 eval {
6556 if ($volid =~ m|^/|) {
6557 unlink $volid || die 'unlink failed\n';
6558 } else {
6559 PVE::Storage::vdisk_free($storecfg, $volid);
6560 }
6561 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6562 };
6563 print STDERR "unable to cleanup '$volid' - $@" if $@;
6564 } else {
6565 print STDERR "unable to parse line in statfile - $line";
6566 }
6567 }
6568 $fd->close();
6569 }
6570 }
6571
6572 sub restore_file_archive {
6573 my ($archive, $vmid, $user, $opts) = @_;
6574
6575 return restore_vma_archive($archive, $vmid, $user, $opts)
6576 if $archive eq '-';
6577
6578 my $info = PVE::Storage::archive_info($archive);
6579 my $format = $opts->{format} // $info->{format};
6580 my $comp = $info->{compression};
6581
6582 # try to detect archive format
6583 if ($format eq 'tar') {
6584 return restore_tar_archive($archive, $vmid, $user, $opts);
6585 } else {
6586 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6587 }
6588 }
6589
6590 # hepler to remove disks that will not be used after restore
6591 my $restore_cleanup_oldconf = sub {
6592 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6593
6594 my $kept_disks = {};
6595
6596 PVE::QemuConfig->foreach_volume($oldconf, sub {
6597 my ($ds, $drive) = @_;
6598
6599 return if drive_is_cdrom($drive, 1);
6600
6601 my $volid = $drive->{file};
6602 return if !$volid || $volid =~ m|^/|;
6603
6604 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6605 return if !$path || !$owner || ($owner != $vmid);
6606
6607 # Note: only delete disk we want to restore
6608 # other volumes will become unused
6609 if ($virtdev_hash->{$ds}) {
6610 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6611 if (my $err = $@) {
6612 warn $err;
6613 }
6614 } else {
6615 $kept_disks->{$volid} = 1;
6616 }
6617 });
6618
6619 # after the restore we have no snapshots anymore
6620 for my $snapname (keys $oldconf->{snapshots}->%*) {
6621 my $snap = $oldconf->{snapshots}->{$snapname};
6622 if ($snap->{vmstate}) {
6623 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6624 if (my $err = $@) {
6625 warn $err;
6626 }
6627 }
6628
6629 for my $volid (keys $kept_disks->%*) {
6630 eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname); };
6631 warn $@ if $@;
6632 }
6633 }
6634 };
6635
6636 # Helper to parse vzdump backup device hints
6637 #
6638 # $rpcenv: Environment, used to ckeck storage permissions
6639 # $user: User ID, to check storage permissions
6640 # $storecfg: Storage configuration
6641 # $fh: the file handle for reading the configuration
6642 # $devinfo: should contain device sizes for all backu-up'ed devices
6643 # $options: backup options (pool, default storage)
6644 #
6645 # Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6646 my $parse_backup_hints = sub {
6647 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6648
6649 my $check_storage = sub { # assert if an image can be allocate
6650 my ($storeid, $scfg) = @_;
6651 die "Content type 'images' is not available on storage '$storeid'\n"
6652 if !$scfg->{content}->{images};
6653 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace'])
6654 if $user ne 'root@pam';
6655 };
6656
6657 my $virtdev_hash = {};
6658 while (defined(my $line = <$fh>)) {
6659 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6660 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6661 die "archive does not contain data for drive '$virtdev'\n"
6662 if !$devinfo->{$devname};
6663
6664 if (defined($options->{storage})) {
6665 $storeid = $options->{storage} || 'local';
6666 } elsif (!$storeid) {
6667 $storeid = 'local';
6668 }
6669 $format = 'raw' if !$format;
6670 $devinfo->{$devname}->{devname} = $devname;
6671 $devinfo->{$devname}->{virtdev} = $virtdev;
6672 $devinfo->{$devname}->{format} = $format;
6673 $devinfo->{$devname}->{storeid} = $storeid;
6674
6675 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6676 $check_storage->($storeid, $scfg); # permission and content type check
6677
6678 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6679 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6680 my $virtdev = $1;
6681 my $drive = parse_drive($virtdev, $2);
6682
6683 if (drive_is_cloudinit($drive)) {
6684 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6685 $storeid = $options->{storage} if defined ($options->{storage});
6686 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6687 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6688
6689 $check_storage->($storeid, $scfg); # permission and content type check
6690
6691 $virtdev_hash->{$virtdev} = {
6692 format => $format,
6693 storeid => $storeid,
6694 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6695 is_cloudinit => 1,
6696 };
6697 }
6698 }
6699 }
6700
6701 return $virtdev_hash;
6702 };
6703
6704 # Helper to allocate and activate all volumes required for a restore
6705 #
6706 # $storecfg: Storage configuration
6707 # $virtdev_hash: as returned by parse_backup_hints()
6708 #
6709 # Returns: { $virtdev => $volid }
6710 my $restore_allocate_devices = sub {
6711 my ($storecfg, $virtdev_hash, $vmid) = @_;
6712
6713 my $map = {};
6714 foreach my $virtdev (sort keys %$virtdev_hash) {
6715 my $d = $virtdev_hash->{$virtdev};
6716 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6717 my $storeid = $d->{storeid};
6718 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6719
6720 # test if requested format is supported
6721 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6722 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6723 $d->{format} = $defFormat if !$supported;
6724
6725 my $name;
6726 if ($d->{is_cloudinit}) {
6727 $name = "vm-$vmid-cloudinit";
6728 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6729 if ($scfg->{path}) {
6730 $name .= ".$d->{format}";
6731 }
6732 }
6733
6734 my $volid = PVE::Storage::vdisk_alloc(
6735 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6736
6737 print STDERR "new volume ID is '$volid'\n";
6738 $d->{volid} = $volid;
6739
6740 PVE::Storage::activate_volumes($storecfg, [$volid]);
6741
6742 $map->{$virtdev} = $volid;
6743 }
6744
6745 return $map;
6746 };
6747
6748 sub restore_update_config_line {
6749 my ($cookie, $map, $line, $unique) = @_;
6750
6751 return '' if $line =~ m/^\#qmdump\#/;
6752 return '' if $line =~ m/^\#vzdump\#/;
6753 return '' if $line =~ m/^lock:/;
6754 return '' if $line =~ m/^unused\d+:/;
6755 return '' if $line =~ m/^parent:/;
6756
6757 my $res = '';
6758
6759 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6760 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6761 # try to convert old 1.X settings
6762 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6763 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6764 my ($model, $macaddr) = split(/\=/, $devconfig);
6765 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6766 my $net = {
6767 model => $model,
6768 bridge => "vmbr$ind",
6769 macaddr => $macaddr,
6770 };
6771 my $netstr = print_net($net);
6772
6773 $res .= "net$cookie->{netcount}: $netstr\n";
6774 $cookie->{netcount}++;
6775 }
6776 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6777 my ($id, $netstr) = ($1, $2);
6778 my $net = parse_net($netstr);
6779 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6780 $netstr = print_net($net);
6781 $res .= "$id: $netstr\n";
6782 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6783 my $virtdev = $1;
6784 my $value = $3;
6785 my $di = parse_drive($virtdev, $value);
6786 if (defined($di->{backup}) && !$di->{backup}) {
6787 $res .= "#$line";
6788 } elsif ($map->{$virtdev}) {
6789 delete $di->{format}; # format can change on restore
6790 $di->{file} = $map->{$virtdev};
6791 $value = print_drive($di);
6792 $res .= "$virtdev: $value\n";
6793 } else {
6794 $res .= $line;
6795 }
6796 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6797 my $vmgenid = $1;
6798 if ($vmgenid ne '0') {
6799 # always generate a new vmgenid if there was a valid one setup
6800 $vmgenid = generate_uuid();
6801 }
6802 $res .= "vmgenid: $vmgenid\n";
6803 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6804 my ($uuid, $uuid_str);
6805 UUID::generate($uuid);
6806 UUID::unparse($uuid, $uuid_str);
6807 my $smbios1 = parse_smbios1($2);
6808 $smbios1->{uuid} = $uuid_str;
6809 $res .= $1.print_smbios1($smbios1)."\n";
6810 } else {
6811 $res .= $line;
6812 }
6813
6814 return $res;
6815 }
6816
6817 my $restore_deactivate_volumes = sub {
6818 my ($storecfg, $virtdev_hash) = @_;
6819
6820 my $vollist = [];
6821 for my $dev (values $virtdev_hash->%*) {
6822 push $vollist->@*, $dev->{volid} if $dev->{volid};
6823 }
6824
6825 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
6826 print STDERR $@ if $@;
6827 };
6828
6829 my $restore_destroy_volumes = sub {
6830 my ($storecfg, $virtdev_hash) = @_;
6831
6832 for my $dev (values $virtdev_hash->%*) {
6833 my $volid = $dev->{volid} or next;
6834 eval {
6835 PVE::Storage::vdisk_free($storecfg, $volid);
6836 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6837 };
6838 print STDERR "unable to cleanup '$volid' - $@" if $@;
6839 }
6840 };
6841
6842 sub restore_merge_config {
6843 my ($filename, $backup_conf_raw, $override_conf) = @_;
6844
6845 my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
6846 for my $key (keys $override_conf->%*) {
6847 $backup_conf->{$key} = $override_conf->{$key};
6848 }
6849
6850 return $backup_conf;
6851 }
6852
6853 sub scan_volids {
6854 my ($cfg, $vmid) = @_;
6855
6856 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6857
6858 my $volid_hash = {};
6859 foreach my $storeid (keys %$info) {
6860 foreach my $item (@{$info->{$storeid}}) {
6861 next if !($item->{volid} && $item->{size});
6862 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6863 $volid_hash->{$item->{volid}} = $item;
6864 }
6865 }
6866
6867 return $volid_hash;
6868 }
6869
6870 sub update_disk_config {
6871 my ($vmid, $conf, $volid_hash) = @_;
6872
6873 my $changes;
6874 my $prefix = "VM $vmid";
6875
6876 # used and unused disks
6877 my $referenced = {};
6878
6879 # Note: it is allowed to define multiple storages with same path (alias), so
6880 # we need to check both 'volid' and real 'path' (two different volid can point
6881 # to the same path).
6882
6883 my $referencedpath = {};
6884
6885 # update size info
6886 PVE::QemuConfig->foreach_volume($conf, sub {
6887 my ($opt, $drive) = @_;
6888
6889 my $volid = $drive->{file};
6890 return if !$volid;
6891 my $volume = $volid_hash->{$volid};
6892
6893 # mark volid as "in-use" for next step
6894 $referenced->{$volid} = 1;
6895 if ($volume && (my $path = $volume->{path})) {
6896 $referencedpath->{$path} = 1;
6897 }
6898
6899 return if drive_is_cdrom($drive);
6900 return if !$volume;
6901
6902 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6903 if (defined($updated)) {
6904 $changes = 1;
6905 $conf->{$opt} = print_drive($updated);
6906 print "$prefix ($opt): $msg\n";
6907 }
6908 });
6909
6910 # remove 'unusedX' entry if volume is used
6911 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6912 my ($opt, $drive) = @_;
6913
6914 my $volid = $drive->{file};
6915 return if !$volid;
6916
6917 my $path;
6918 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6919 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6920 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6921 $changes = 1;
6922 delete $conf->{$opt};
6923 }
6924
6925 $referenced->{$volid} = 1;
6926 $referencedpath->{$path} = 1 if $path;
6927 });
6928
6929 foreach my $volid (sort keys %$volid_hash) {
6930 next if $volid =~ m/vm-$vmid-state-/;
6931 next if $referenced->{$volid};
6932 my $path = $volid_hash->{$volid}->{path};
6933 next if !$path; # just to be sure
6934 next if $referencedpath->{$path};
6935 $changes = 1;
6936 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6937 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6938 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6939 }
6940
6941 return $changes;
6942 }
6943
6944 sub rescan {
6945 my ($vmid, $nolock, $dryrun) = @_;
6946
6947 my $cfg = PVE::Storage::config();
6948
6949 print "rescan volumes...\n";
6950 my $volid_hash = scan_volids($cfg, $vmid);
6951
6952 my $updatefn = sub {
6953 my ($vmid) = @_;
6954
6955 my $conf = PVE::QemuConfig->load_config($vmid);
6956
6957 PVE::QemuConfig->check_lock($conf);
6958
6959 my $vm_volids = {};
6960 foreach my $volid (keys %$volid_hash) {
6961 my $info = $volid_hash->{$volid};
6962 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6963 }
6964
6965 my $changes = update_disk_config($vmid, $conf, $vm_volids);
6966
6967 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
6968 };
6969
6970 if (defined($vmid)) {
6971 if ($nolock) {
6972 &$updatefn($vmid);
6973 } else {
6974 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6975 }
6976 } else {
6977 my $vmlist = config_list();
6978 foreach my $vmid (keys %$vmlist) {
6979 if ($nolock) {
6980 &$updatefn($vmid);
6981 } else {
6982 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6983 }
6984 }
6985 }
6986 }
6987
6988 sub restore_proxmox_backup_archive {
6989 my ($archive, $vmid, $user, $options) = @_;
6990
6991 my $storecfg = PVE::Storage::config();
6992
6993 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
6994 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6995
6996 my $fingerprint = $scfg->{fingerprint};
6997 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
6998
6999 my $repo = PVE::PBSClient::get_repository($scfg);
7000 my $namespace = $scfg->{namespace};
7001
7002 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
7003 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
7004 local $ENV{PBS_PASSWORD} = $password;
7005 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
7006
7007 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
7008 PVE::Storage::parse_volname($storecfg, $archive);
7009
7010 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
7011
7012 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
7013
7014 my $tmpdir = "/var/tmp/vzdumptmp$$";
7015 rmtree $tmpdir;
7016 mkpath $tmpdir;
7017
7018 my $conffile = PVE::QemuConfig->config_file($vmid);
7019 # disable interrupts (always do cleanups)
7020 local $SIG{INT} =
7021 local $SIG{TERM} =
7022 local $SIG{QUIT} =
7023 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7024
7025 # Note: $oldconf is undef if VM does not exists
7026 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7027 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
7028 my $new_conf_raw = '';
7029
7030 my $rpcenv = PVE::RPCEnvironment::get();
7031 my $devinfo = {}; # info about drives included in backup
7032 my $virtdev_hash = {}; # info about allocated drives
7033
7034 eval {
7035 # enable interrupts
7036 local $SIG{INT} =
7037 local $SIG{TERM} =
7038 local $SIG{QUIT} =
7039 local $SIG{HUP} =
7040 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7041
7042 my $cfgfn = "$tmpdir/qemu-server.conf";
7043 my $firewall_config_fn = "$tmpdir/fw.conf";
7044 my $index_fn = "$tmpdir/index.json";
7045
7046 my $cmd = "restore";
7047
7048 my $param = [$pbs_backup_name, "index.json", $index_fn];
7049 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7050 my $index = PVE::Tools::file_get_contents($index_fn);
7051 $index = decode_json($index);
7052
7053 foreach my $info (@{$index->{files}}) {
7054 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
7055 my $devname = $1;
7056 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
7057 $devinfo->{$devname}->{size} = $1;
7058 } else {
7059 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
7060 }
7061 }
7062 }
7063
7064 my $is_qemu_server_backup = scalar(
7065 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
7066 );
7067 if (!$is_qemu_server_backup) {
7068 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
7069 }
7070 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
7071
7072 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
7073 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7074
7075 if ($has_firewall_config) {
7076 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
7077 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7078
7079 my $pve_firewall_dir = '/etc/pve/firewall';
7080 mkdir $pve_firewall_dir; # make sure the dir exists
7081 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
7082 }
7083
7084 my $fh = IO::File->new($cfgfn, "r") ||
7085 die "unable to read qemu-server.conf - $!\n";
7086
7087 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
7088
7089 # fixme: rate limit?
7090
7091 # create empty/temp config
7092 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
7093
7094 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
7095
7096 # allocate volumes
7097 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
7098
7099 foreach my $virtdev (sort keys %$virtdev_hash) {
7100 my $d = $virtdev_hash->{$virtdev};
7101 next if $d->{is_cloudinit}; # no need to restore cloudinit
7102
7103 # this fails if storage is unavailable
7104 my $volid = $d->{volid};
7105 my $path = PVE::Storage::path($storecfg, $volid);
7106
7107 # for live-restore we only want to preload the efidisk and TPM state
7108 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
7109
7110 my @ns_arg;
7111 if (defined(my $ns = $scfg->{namespace})) {
7112 @ns_arg = ('--ns', $ns);
7113 }
7114
7115 my $pbs_restore_cmd = [
7116 '/usr/bin/pbs-restore',
7117 '--repository', $repo,
7118 @ns_arg,
7119 $pbs_backup_name,
7120 "$d->{devname}.img.fidx",
7121 $path,
7122 '--verbose',
7123 ];
7124
7125 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
7126 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
7127
7128 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
7129 push @$pbs_restore_cmd, '--skip-zero';
7130 }
7131
7132 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
7133 print "restore proxmox backup image: $dbg_cmdstring\n";
7134 run_command($pbs_restore_cmd);
7135 }
7136
7137 $fh->seek(0, 0) || die "seek failed - $!\n";
7138
7139 my $cookie = { netcount => 0 };
7140 while (defined(my $line = <$fh>)) {
7141 $new_conf_raw .= restore_update_config_line(
7142 $cookie,
7143 $map,
7144 $line,
7145 $options->{unique},
7146 );
7147 }
7148
7149 $fh->close();
7150 };
7151 my $err = $@;
7152
7153 if ($err || !$options->{live}) {
7154 $restore_deactivate_volumes->($storecfg, $virtdev_hash);
7155 }
7156
7157 rmtree $tmpdir;
7158
7159 if ($err) {
7160 $restore_destroy_volumes->($storecfg, $virtdev_hash);
7161 die $err;
7162 }
7163
7164 if ($options->{live}) {
7165 # keep lock during live-restore
7166 $new_conf_raw .= "\nlock: create";
7167 }
7168
7169 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $options->{override_conf});
7170 check_restore_permissions($rpcenv, $user, $new_conf);
7171 PVE::QemuConfig->write_config($vmid, $new_conf);
7172
7173 eval { rescan($vmid, 1); };
7174 warn $@ if $@;
7175
7176 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
7177
7178 if ($options->{live}) {
7179 # enable interrupts
7180 local $SIG{INT} =
7181 local $SIG{TERM} =
7182 local $SIG{QUIT} =
7183 local $SIG{HUP} =
7184 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
7185
7186 my $conf = PVE::QemuConfig->load_config($vmid);
7187 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
7188
7189 # these special drives are already restored before start
7190 delete $devinfo->{'drive-efidisk0'};
7191 delete $devinfo->{'drive-tpmstate0-backup'};
7192
7193 my $pbs_opts = {
7194 repo => $repo,
7195 keyfile => $keyfile,
7196 snapshot => $pbs_backup_name,
7197 namespace => $namespace,
7198 };
7199 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $pbs_opts);
7200
7201 PVE::QemuConfig->remove_lock($vmid, "create");
7202 }
7203 }
7204
7205 sub pbs_live_restore {
7206 my ($vmid, $conf, $storecfg, $restored_disks, $opts) = @_;
7207
7208 print "starting VM for live-restore\n";
7209 print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n";
7210
7211 my $live_restore_backing = {};
7212 for my $ds (keys %$restored_disks) {
7213 $ds =~ m/^drive-(.*)$/;
7214 my $confname = $1;
7215 my $pbs_conf = {};
7216 $pbs_conf = {
7217 repository => $opts->{repo},
7218 snapshot => $opts->{snapshot},
7219 archive => "$ds.img.fidx",
7220 };
7221 $pbs_conf->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile};
7222 $pbs_conf->{namespace} = $opts->{namespace} if defined($opts->{namespace});
7223
7224 my $drive = parse_drive($confname, $conf->{$confname});
7225 print "restoring '$ds' to '$drive->{file}'\n";
7226
7227 my $pbs_name = "drive-${confname}-pbs";
7228 $live_restore_backing->{$confname} = {
7229 name => $pbs_name,
7230 blockdev => print_pbs_blockdev($pbs_conf, $pbs_name),
7231 };
7232 }
7233
7234 my $drives_streamed = 0;
7235 eval {
7236 # make sure HA doesn't interrupt our restore by stopping the VM
7237 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
7238 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
7239 }
7240
7241 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
7242 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
7243 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'live-restore-backing' => $live_restore_backing}, {});
7244
7245 my $qmeventd_fd = register_qmeventd_handle($vmid);
7246
7247 # begin streaming, i.e. data copy from PBS to target disk for every vol,
7248 # this will effectively collapse the backing image chain consisting of
7249 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
7250 # removes itself once all backing images vanish with 'auto-remove=on')
7251 my $jobs = {};
7252 for my $ds (sort keys %$restored_disks) {
7253 my $job_id = "restore-$ds";
7254 mon_cmd($vmid, 'block-stream',
7255 'job-id' => $job_id,
7256 device => "$ds",
7257 );
7258 $jobs->{$job_id} = {};
7259 }
7260
7261 mon_cmd($vmid, 'cont');
7262 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
7263
7264 print "restore-drive jobs finished successfully, removing all tracking block devices"
7265 ." to disconnect from Proxmox Backup Server\n";
7266
7267 for my $ds (sort keys %$restored_disks) {
7268 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
7269 }
7270
7271 close($qmeventd_fd);
7272 };
7273
7274 my $err = $@;
7275
7276 if ($err) {
7277 warn "An error occurred during live-restore: $err\n";
7278 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
7279 die "live-restore failed\n";
7280 }
7281 }
7282
7283 # Inspired by pbs live-restore, this restores with the disks being available as files.
7284 # Theoretically this can also be used to quick-start a full-clone vm if the
7285 # disks are all available as files.
7286 #
7287 # The mapping should provide a path by config entry, such as
7288 # `{ scsi0 => { format => <qcow2|raw|...>, path => "/path/to/file", sata1 => ... } }`
7289 #
7290 # This is used when doing a `create` call with the `--live-import` parameter,
7291 # where the disks get an `import-from=` property. The non-live part is
7292 # therefore already handled in the `$create_disks()` call happening in the
7293 # `create` api call
7294 sub live_import_from_files {
7295 my ($mapping, $vmid, $conf, $restore_options) = @_;
7296
7297 my $live_restore_backing = {};
7298 for my $dev (keys %$mapping) {
7299 die "disk not support for live-restoring: '$dev'\n"
7300 if !is_valid_drivename($dev) || $dev =~ /^(?:efidisk|tpmstate)/;
7301
7302 die "mapping contains disk '$dev' which does not exist in the config\n"
7303 if !exists($conf->{$dev});
7304
7305 my $info = $mapping->{$dev};
7306 my ($format, $path) = $info->@{qw(format path)};
7307 die "missing path for '$dev' mapping\n" if !$path;
7308 die "missing format for '$dev' mapping\n" if !$format;
7309 die "invalid format '$format' for '$dev' mapping\n"
7310 if !grep { $format eq $_ } qw(raw qcow2 vmdk);
7311
7312 $live_restore_backing->{$dev} = {
7313 name => "drive-$dev-restore",
7314 blockdev => "driver=$format,node-name=drive-$dev-restore"
7315 . ",read-only=on"
7316 . ",file.driver=file,file.filename=$path"
7317 };
7318 };
7319
7320 my $storecfg = PVE::Storage::config();
7321 eval {
7322
7323 # make sure HA doesn't interrupt our restore by stopping the VM
7324 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
7325 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
7326 }
7327
7328 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'live-restore-backing' => $live_restore_backing}, {});
7329
7330 # prevent shutdowns from qmeventd when the VM powers off from the inside
7331 my $qmeventd_fd = register_qmeventd_handle($vmid);
7332
7333 # begin streaming, i.e. data copy from PBS to target disk for every vol,
7334 # this will effectively collapse the backing image chain consisting of
7335 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
7336 # removes itself once all backing images vanish with 'auto-remove=on')
7337 my $jobs = {};
7338 for my $ds (sort keys %$live_restore_backing) {
7339 my $job_id = "restore-$ds";
7340 mon_cmd($vmid, 'block-stream',
7341 'job-id' => $job_id,
7342 device => "drive-$ds",
7343 );
7344 $jobs->{$job_id} = {};
7345 }
7346
7347 mon_cmd($vmid, 'cont');
7348 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
7349
7350 print "restore-drive jobs finished successfully, removing all tracking block devices\n";
7351
7352 for my $ds (sort keys %$live_restore_backing) {
7353 mon_cmd($vmid, 'blockdev-del', 'node-name' => "drive-$ds-restore");
7354 }
7355
7356 close($qmeventd_fd);
7357 };
7358
7359 my $err = $@;
7360
7361 if ($err) {
7362 warn "An error occurred during live-restore: $err\n";
7363 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
7364 die "live-restore failed\n";
7365 }
7366
7367 PVE::QemuConfig->remove_lock($vmid, "import");
7368 }
7369
7370 sub restore_vma_archive {
7371 my ($archive, $vmid, $user, $opts, $comp) = @_;
7372
7373 my $readfrom = $archive;
7374
7375 my $cfg = PVE::Storage::config();
7376 my $commands = [];
7377 my $bwlimit = $opts->{bwlimit};
7378
7379 my $dbg_cmdstring = '';
7380 my $add_pipe = sub {
7381 my ($cmd) = @_;
7382 push @$commands, $cmd;
7383 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
7384 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
7385 $readfrom = '-';
7386 };
7387
7388 my $input = undef;
7389 if ($archive eq '-') {
7390 $input = '<&STDIN';
7391 } else {
7392 # If we use a backup from a PVE defined storage we also consider that
7393 # storage's rate limit:
7394 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
7395 if (defined($volid)) {
7396 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
7397 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
7398 if ($readlimit) {
7399 print STDERR "applying read rate limit: $readlimit\n";
7400 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
7401 $add_pipe->($cstream);
7402 }
7403 }
7404 }
7405
7406 if ($comp) {
7407 my $info = PVE::Storage::decompressor_info('vma', $comp);
7408 my $cmd = $info->{decompressor};
7409 push @$cmd, $readfrom;
7410 $add_pipe->($cmd);
7411 }
7412
7413 my $tmpdir = "/var/tmp/vzdumptmp$$";
7414 rmtree $tmpdir;
7415
7416 # disable interrupts (always do cleanups)
7417 local $SIG{INT} =
7418 local $SIG{TERM} =
7419 local $SIG{QUIT} =
7420 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
7421
7422 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
7423 POSIX::mkfifo($mapfifo, 0600);
7424 my $fifofh;
7425 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
7426
7427 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
7428
7429 my $devinfo = {}; # info about drives included in backup
7430 my $virtdev_hash = {}; # info about allocated drives
7431
7432 my $rpcenv = PVE::RPCEnvironment::get();
7433
7434 my $conffile = PVE::QemuConfig->config_file($vmid);
7435
7436 # Note: $oldconf is undef if VM does not exist
7437 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7438 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
7439 my $new_conf_raw = '';
7440
7441 my %storage_limits;
7442
7443 my $print_devmap = sub {
7444 my $cfgfn = "$tmpdir/qemu-server.conf";
7445
7446 # we can read the config - that is already extracted
7447 my $fh = IO::File->new($cfgfn, "r") ||
7448 die "unable to read qemu-server.conf - $!\n";
7449
7450 my $fwcfgfn = "$tmpdir/qemu-server.fw";
7451 if (-f $fwcfgfn) {
7452 my $pve_firewall_dir = '/etc/pve/firewall';
7453 mkdir $pve_firewall_dir; # make sure the dir exists
7454 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
7455 }
7456
7457 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
7458
7459 foreach my $info (values %{$virtdev_hash}) {
7460 my $storeid = $info->{storeid};
7461 next if defined($storage_limits{$storeid});
7462
7463 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
7464 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
7465 $storage_limits{$storeid} = $limit * 1024;
7466 }
7467
7468 foreach my $devname (keys %$devinfo) {
7469 die "found no device mapping information for device '$devname'\n"
7470 if !$devinfo->{$devname}->{virtdev};
7471 }
7472
7473 # create empty/temp config
7474 if ($oldconf) {
7475 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
7476 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
7477 }
7478
7479 # allocate volumes
7480 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
7481
7482 # print restore information to $fifofh
7483 foreach my $virtdev (sort keys %$virtdev_hash) {
7484 my $d = $virtdev_hash->{$virtdev};
7485 next if $d->{is_cloudinit}; # no need to restore cloudinit
7486
7487 my $storeid = $d->{storeid};
7488 my $volid = $d->{volid};
7489
7490 my $map_opts = '';
7491 if (my $limit = $storage_limits{$storeid}) {
7492 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
7493 }
7494
7495 my $write_zeros = 1;
7496 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
7497 $write_zeros = 0;
7498 }
7499
7500 my $path = PVE::Storage::path($cfg, $volid);
7501
7502 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
7503
7504 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
7505 }
7506
7507 $fh->seek(0, 0) || die "seek failed - $!\n";
7508
7509 my $cookie = { netcount => 0 };
7510 while (defined(my $line = <$fh>)) {
7511 $new_conf_raw .= restore_update_config_line(
7512 $cookie,
7513 $map,
7514 $line,
7515 $opts->{unique},
7516 );
7517 }
7518
7519 $fh->close();
7520 };
7521
7522 my $oldtimeout;
7523
7524 eval {
7525 # enable interrupts
7526 local $SIG{INT} =
7527 local $SIG{TERM} =
7528 local $SIG{QUIT} =
7529 local $SIG{HUP} =
7530 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7531 local $SIG{ALRM} = sub { die "got timeout\n"; };
7532
7533 $oldtimeout = alarm(5); # for reading the VMA header - might hang with a corrupted one
7534
7535 my $parser = sub {
7536 my $line = shift;
7537
7538 print "$line\n";
7539
7540 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
7541 my ($dev_id, $size, $devname) = ($1, $2, $3);
7542 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
7543 } elsif ($line =~ m/^CTIME: /) {
7544 # we correctly received the vma config, so we can disable
7545 # the timeout now for disk allocation
7546 alarm($oldtimeout || 0);
7547 $oldtimeout = undef;
7548 &$print_devmap();
7549 print $fifofh "done\n";
7550 close($fifofh);
7551 $fifofh = undef;
7552 }
7553 };
7554
7555 print "restore vma archive: $dbg_cmdstring\n";
7556 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
7557 };
7558 my $err = $@;
7559
7560 alarm($oldtimeout) if $oldtimeout;
7561
7562 $restore_deactivate_volumes->($cfg, $virtdev_hash);
7563
7564 close($fifofh) if $fifofh;
7565 unlink $mapfifo;
7566 rmtree $tmpdir;
7567
7568 if ($err) {
7569 $restore_destroy_volumes->($cfg, $virtdev_hash);
7570 die $err;
7571 }
7572
7573 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $opts->{override_conf});
7574 check_restore_permissions($rpcenv, $user, $new_conf);
7575 PVE::QemuConfig->write_config($vmid, $new_conf);
7576
7577 eval { rescan($vmid, 1); };
7578 warn $@ if $@;
7579
7580 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
7581 }
7582
7583 sub restore_tar_archive {
7584 my ($archive, $vmid, $user, $opts) = @_;
7585
7586 if (scalar(keys $opts->{override_conf}->%*) > 0) {
7587 my $keystring = join(' ', keys $opts->{override_conf}->%*);
7588 die "cannot pass along options ($keystring) when restoring from tar archive\n";
7589 }
7590
7591 if ($archive ne '-') {
7592 my $firstfile = tar_archive_read_firstfile($archive);
7593 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
7594 if $firstfile ne 'qemu-server.conf';
7595 }
7596
7597 my $storecfg = PVE::Storage::config();
7598
7599 # avoid zombie disks when restoring over an existing VM -> cleanup first
7600 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
7601 # skiplock=1 because qmrestore has set the 'create' lock itself already
7602 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
7603 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
7604
7605 my $tocmd = "/usr/lib/qemu-server/qmextract";
7606
7607 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
7608 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
7609 $tocmd .= ' --prealloc' if $opts->{prealloc};
7610 $tocmd .= ' --info' if $opts->{info};
7611
7612 # tar option "xf" does not autodetect compression when read from STDIN,
7613 # so we pipe to zcat
7614 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7615 PVE::Tools::shellquote("--to-command=$tocmd");
7616
7617 my $tmpdir = "/var/tmp/vzdumptmp$$";
7618 mkpath $tmpdir;
7619
7620 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7621 local $ENV{VZDUMP_VMID} = $vmid;
7622 local $ENV{VZDUMP_USER} = $user;
7623
7624 my $conffile = PVE::QemuConfig->config_file($vmid);
7625 my $new_conf_raw = '';
7626
7627 # disable interrupts (always do cleanups)
7628 local $SIG{INT} =
7629 local $SIG{TERM} =
7630 local $SIG{QUIT} =
7631 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7632
7633 eval {
7634 # enable interrupts
7635 local $SIG{INT} =
7636 local $SIG{TERM} =
7637 local $SIG{QUIT} =
7638 local $SIG{HUP} =
7639 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7640
7641 if ($archive eq '-') {
7642 print "extracting archive from STDIN\n";
7643 run_command($cmd, input => "<&STDIN");
7644 } else {
7645 print "extracting archive '$archive'\n";
7646 run_command($cmd);
7647 }
7648
7649 return if $opts->{info};
7650
7651 # read new mapping
7652 my $map = {};
7653 my $statfile = "$tmpdir/qmrestore.stat";
7654 if (my $fd = IO::File->new($statfile, "r")) {
7655 while (defined (my $line = <$fd>)) {
7656 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7657 $map->{$1} = $2 if $1;
7658 } else {
7659 print STDERR "unable to parse line in statfile - $line\n";
7660 }
7661 }
7662 $fd->close();
7663 }
7664
7665 my $confsrc = "$tmpdir/qemu-server.conf";
7666
7667 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
7668
7669 my $cookie = { netcount => 0 };
7670 while (defined (my $line = <$srcfd>)) {
7671 $new_conf_raw .= restore_update_config_line(
7672 $cookie,
7673 $map,
7674 $line,
7675 $opts->{unique},
7676 );
7677 }
7678
7679 $srcfd->close();
7680 };
7681 if (my $err = $@) {
7682 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7683 die $err;
7684 }
7685
7686 rmtree $tmpdir;
7687
7688 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7689
7690 PVE::Cluster::cfs_update(); # make sure we read new file
7691
7692 eval { rescan($vmid, 1); };
7693 warn $@ if $@;
7694 };
7695
7696 sub foreach_storage_used_by_vm {
7697 my ($conf, $func) = @_;
7698
7699 my $sidhash = {};
7700
7701 PVE::QemuConfig->foreach_volume($conf, sub {
7702 my ($ds, $drive) = @_;
7703 return if drive_is_cdrom($drive);
7704
7705 my $volid = $drive->{file};
7706
7707 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7708 $sidhash->{$sid} = $sid if $sid;
7709 });
7710
7711 foreach my $sid (sort keys %$sidhash) {
7712 &$func($sid);
7713 }
7714 }
7715
7716 my $qemu_snap_storage = {
7717 rbd => 1,
7718 };
7719 sub do_snapshots_with_qemu {
7720 my ($storecfg, $volid, $deviceid) = @_;
7721
7722 return if $deviceid =~ m/tpmstate0/;
7723
7724 my $storage_name = PVE::Storage::parse_volume_id($volid);
7725 my $scfg = $storecfg->{ids}->{$storage_name};
7726 die "could not find storage '$storage_name'\n" if !defined($scfg);
7727
7728 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7729 return 1;
7730 }
7731
7732 if ($volid =~ m/\.(qcow2|qed)$/){
7733 return 1;
7734 }
7735
7736 return;
7737 }
7738
7739 sub qga_check_running {
7740 my ($vmid, $nowarn) = @_;
7741
7742 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7743 if ($@) {
7744 warn "QEMU Guest Agent is not running - $@" if !$nowarn;
7745 return 0;
7746 }
7747 return 1;
7748 }
7749
7750 sub template_create {
7751 my ($vmid, $conf, $disk) = @_;
7752
7753 my $storecfg = PVE::Storage::config();
7754
7755 PVE::QemuConfig->foreach_volume($conf, sub {
7756 my ($ds, $drive) = @_;
7757
7758 return if drive_is_cdrom($drive);
7759 return if $disk && $ds ne $disk;
7760
7761 my $volid = $drive->{file};
7762 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7763
7764 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7765 $drive->{file} = $voliddst;
7766 $conf->{$ds} = print_drive($drive);
7767 PVE::QemuConfig->write_config($vmid, $conf);
7768 });
7769 }
7770
7771 sub convert_iscsi_path {
7772 my ($path) = @_;
7773
7774 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7775 my $portal = $1;
7776 my $target = $2;
7777 my $lun = $3;
7778
7779 my $initiator_name = get_initiator_name();
7780
7781 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7782 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7783 }
7784
7785 die "cannot convert iscsi path '$path', unkown format\n";
7786 }
7787
7788 sub qemu_img_convert {
7789 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized, $bwlimit) = @_;
7790
7791 my $storecfg = PVE::Storage::config();
7792 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7793 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7794
7795 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7796
7797 my $cachemode;
7798 my $src_path;
7799 my $src_is_iscsi = 0;
7800 my $src_format;
7801
7802 if ($src_storeid) {
7803 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7804 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7805 $src_format = qemu_img_format($src_scfg, $src_volname);
7806 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7807 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7808 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7809 } elsif (-f $src_volid || -b $src_volid) {
7810 $src_path = $src_volid;
7811 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7812 $src_format = $1;
7813 }
7814 }
7815
7816 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7817
7818 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7819 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7820 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7821 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7822
7823 my $cmd = [];
7824 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7825 push @$cmd, '-l', "snapshot.name=$snapname"
7826 if $snapname && $src_format && $src_format eq "qcow2";
7827 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7828 push @$cmd, '-T', $cachemode if defined($cachemode);
7829 push @$cmd, '-r', "${bwlimit}K" if defined($bwlimit);
7830
7831 if ($src_is_iscsi) {
7832 push @$cmd, '--image-opts';
7833 $src_path = convert_iscsi_path($src_path);
7834 } elsif ($src_format) {
7835 push @$cmd, '-f', $src_format;
7836 }
7837
7838 if ($dst_is_iscsi) {
7839 push @$cmd, '--target-image-opts';
7840 $dst_path = convert_iscsi_path($dst_path);
7841 } else {
7842 push @$cmd, '-O', $dst_format;
7843 }
7844
7845 push @$cmd, $src_path;
7846
7847 if (!$dst_is_iscsi && $is_zero_initialized) {
7848 push @$cmd, "zeroinit:$dst_path";
7849 } else {
7850 push @$cmd, $dst_path;
7851 }
7852
7853 my $parser = sub {
7854 my $line = shift;
7855 if($line =~ m/\((\S+)\/100\%\)/){
7856 my $percent = $1;
7857 my $transferred = int($size * $percent / 100);
7858 my $total_h = render_bytes($size, 1);
7859 my $transferred_h = render_bytes($transferred, 1);
7860
7861 print "transferred $transferred_h of $total_h ($percent%)\n";
7862 }
7863
7864 };
7865
7866 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7867 my $err = $@;
7868 die "copy failed: $err" if $err;
7869 }
7870
7871 sub qemu_img_format {
7872 my ($scfg, $volname) = @_;
7873
7874 # FIXME: this entire function is kind of weird given that `parse_volname`
7875 # also already gives us a format?
7876 my $is_path_storage = $scfg->{path} || $scfg->{type} eq 'esxi';
7877
7878 if ($is_path_storage && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7879 return $1;
7880 } else {
7881 return "raw";
7882 }
7883 }
7884
7885 sub qemu_drive_mirror {
7886 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7887
7888 $jobs = {} if !$jobs;
7889
7890 my $qemu_target;
7891 my $format;
7892 $jobs->{"drive-$drive"} = {};
7893
7894 if ($dst_volid =~ /^nbd:/) {
7895 $qemu_target = $dst_volid;
7896 $format = "nbd";
7897 } else {
7898 my $storecfg = PVE::Storage::config();
7899 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7900
7901 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7902
7903 $format = qemu_img_format($dst_scfg, $dst_volname);
7904
7905 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7906
7907 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7908 }
7909
7910 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7911 $opts->{format} = $format if $format;
7912
7913 if (defined($src_bitmap)) {
7914 $opts->{sync} = 'incremental';
7915 $opts->{bitmap} = $src_bitmap;
7916 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7917 }
7918
7919 if (defined($bwlimit)) {
7920 $opts->{speed} = $bwlimit * 1024;
7921 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7922 } else {
7923 print "drive mirror is starting for drive-$drive\n";
7924 }
7925
7926 # if a job already runs for this device we get an error, catch it for cleanup
7927 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7928 if (my $err = $@) {
7929 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7930 warn "$@\n" if $@;
7931 die "mirroring error: $err\n";
7932 }
7933
7934 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7935 }
7936
7937 # $completion can be either
7938 # 'complete': wait until all jobs are ready, block-job-complete them (default)
7939 # 'cancel': wait until all jobs are ready, block-job-cancel them
7940 # 'skip': wait until all jobs are ready, return with block jobs in ready state
7941 # 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7942 sub qemu_drive_mirror_monitor {
7943 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7944
7945 $completion //= 'complete';
7946 $op //= "mirror";
7947
7948 eval {
7949 my $err_complete = 0;
7950
7951 my $starttime = time ();
7952 while (1) {
7953 die "block job ('$op') timed out\n" if $err_complete > 300;
7954
7955 my $stats = mon_cmd($vmid, "query-block-jobs");
7956 my $ctime = time();
7957
7958 my $running_jobs = {};
7959 for my $stat (@$stats) {
7960 next if $stat->{type} ne $op;
7961 $running_jobs->{$stat->{device}} = $stat;
7962 }
7963
7964 my $readycounter = 0;
7965
7966 for my $job_id (sort keys %$jobs) {
7967 my $job = $running_jobs->{$job_id};
7968
7969 my $vanished = !defined($job);
7970 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7971 if($complete || ($vanished && $completion eq 'auto')) {
7972 print "$job_id: $op-job finished\n";
7973 delete $jobs->{$job_id};
7974 next;
7975 }
7976
7977 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7978
7979 my $busy = $job->{busy};
7980 my $ready = $job->{ready};
7981 if (my $total = $job->{len}) {
7982 my $transferred = $job->{offset} || 0;
7983 my $remaining = $total - $transferred;
7984 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7985
7986 my $duration = $ctime - $starttime;
7987 my $total_h = render_bytes($total, 1);
7988 my $transferred_h = render_bytes($transferred, 1);
7989
7990 my $status = sprintf(
7991 "transferred $transferred_h of $total_h ($percent%%) in %s",
7992 render_duration($duration),
7993 );
7994
7995 if ($ready) {
7996 if ($busy) {
7997 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7998 } else {
7999 $status .= ", ready";
8000 }
8001 }
8002 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
8003 $jobs->{$job_id}->{ready} = $ready;
8004 }
8005
8006 $readycounter++ if $job->{ready};
8007 }
8008
8009 last if scalar(keys %$jobs) == 0;
8010
8011 if ($readycounter == scalar(keys %$jobs)) {
8012 print "all '$op' jobs are ready\n";
8013
8014 # do the complete later (or has already been done)
8015 last if $completion eq 'skip' || $completion eq 'auto';
8016
8017 if ($vmiddst && $vmiddst != $vmid) {
8018 my $agent_running = $qga && qga_check_running($vmid);
8019 if ($agent_running) {
8020 print "freeze filesystem\n";
8021 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
8022 warn $@ if $@;
8023 } else {
8024 print "suspend vm\n";
8025 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
8026 warn $@ if $@;
8027 }
8028
8029 # if we clone a disk for a new target vm, we don't switch the disk
8030 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
8031
8032 if ($agent_running) {
8033 print "unfreeze filesystem\n";
8034 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
8035 warn $@ if $@;
8036 } else {
8037 print "resume vm\n";
8038 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
8039 warn $@ if $@;
8040 }
8041
8042 last;
8043 } else {
8044
8045 for my $job_id (sort keys %$jobs) {
8046 # try to switch the disk if source and destination are on the same guest
8047 print "$job_id: Completing block job_id...\n";
8048
8049 my $op;
8050 if ($completion eq 'complete') {
8051 $op = 'block-job-complete';
8052 } elsif ($completion eq 'cancel') {
8053 $op = 'block-job-cancel';
8054 } else {
8055 die "invalid completion value: $completion\n";
8056 }
8057 eval { mon_cmd($vmid, $op, device => $job_id) };
8058 if ($@ =~ m/cannot be completed/) {
8059 print "$job_id: block job cannot be completed, trying again.\n";
8060 $err_complete++;
8061 }else {
8062 print "$job_id: Completed successfully.\n";
8063 $jobs->{$job_id}->{complete} = 1;
8064 }
8065 }
8066 }
8067 }
8068 sleep 1;
8069 }
8070 };
8071 my $err = $@;
8072
8073 if ($err) {
8074 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
8075 die "block job ($op) error: $err";
8076 }
8077 }
8078
8079 sub qemu_blockjobs_cancel {
8080 my ($vmid, $jobs) = @_;
8081
8082 foreach my $job (keys %$jobs) {
8083 print "$job: Cancelling block job\n";
8084 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
8085 $jobs->{$job}->{cancel} = 1;
8086 }
8087
8088 while (1) {
8089 my $stats = mon_cmd($vmid, "query-block-jobs");
8090
8091 my $running_jobs = {};
8092 foreach my $stat (@$stats) {
8093 $running_jobs->{$stat->{device}} = $stat;
8094 }
8095
8096 foreach my $job (keys %$jobs) {
8097
8098 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
8099 print "$job: Done.\n";
8100 delete $jobs->{$job};
8101 }
8102 }
8103
8104 last if scalar(keys %$jobs) == 0;
8105
8106 sleep 1;
8107 }
8108 }
8109
8110 # Check for bug #4525: drive-mirror will open the target drive with the same aio setting as the
8111 # source, but some storages have problems with io_uring, sometimes even leading to crashes.
8112 my sub clone_disk_check_io_uring {
8113 my ($src_drive, $storecfg, $src_storeid, $dst_storeid, $use_drive_mirror) = @_;
8114
8115 return if !$use_drive_mirror;
8116
8117 # Don't complain when not changing storage.
8118 # Assume if it works for the source, it'll work for the target too.
8119 return if $src_storeid eq $dst_storeid;
8120
8121 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
8122 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
8123
8124 my $cache_direct = drive_uses_cache_direct($src_drive);
8125
8126 my $src_uses_io_uring;
8127 if ($src_drive->{aio}) {
8128 $src_uses_io_uring = $src_drive->{aio} eq 'io_uring';
8129 } else {
8130 $src_uses_io_uring = storage_allows_io_uring_default($src_scfg, $cache_direct);
8131 }
8132
8133 die "target storage is known to cause issues with aio=io_uring (used by current drive)\n"
8134 if $src_uses_io_uring && !storage_allows_io_uring_default($dst_scfg, $cache_direct);
8135 }
8136
8137 sub clone_disk {
8138 my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
8139
8140 my ($vmid, $running) = $source->@{qw(vmid running)};
8141 my ($src_drivename, $drive, $snapname) = $source->@{qw(drivename drive snapname)};
8142
8143 my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)};
8144 my ($storage, $format) = $dest->@{qw(storage format)};
8145
8146 my $use_drive_mirror = $full && $running && $src_drivename && !$snapname;
8147
8148 if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) {
8149 die "cloning from/to EFI disk requires EFI disk\n"
8150 if $src_drivename eq 'efidisk0' || $dst_drivename eq 'efidisk0';
8151 die "cloning from/to TPM state requires TPM state\n"
8152 if $src_drivename eq 'tpmstate0' || $dst_drivename eq 'tpmstate0';
8153
8154 # This would lead to two device nodes in QEMU pointing to the same backing image!
8155 die "cannot change drive name when cloning disk from/to the same VM\n"
8156 if $use_drive_mirror && $vmid == $newvmid;
8157 }
8158
8159 die "cannot move TPM state while VM is running\n"
8160 if $use_drive_mirror && $src_drivename eq 'tpmstate0';
8161
8162 my $newvolid;
8163
8164 print "create " . ($full ? 'full' : 'linked') . " clone of drive ";
8165 print "$src_drivename " if $src_drivename;
8166 print "($drive->{file})\n";
8167
8168 if (!$full) {
8169 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
8170 push @$newvollist, $newvolid;
8171 } else {
8172 my ($src_storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
8173 my $storeid = $storage || $src_storeid;
8174
8175 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
8176
8177 my $name = undef;
8178 my $size = undef;
8179 if (drive_is_cloudinit($drive)) {
8180 $name = "vm-$newvmid-cloudinit";
8181 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8182 if ($scfg->{path}) {
8183 $name .= ".$dst_format";
8184 }
8185 $snapname = undef;
8186 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
8187 } elsif ($dst_drivename eq 'efidisk0') {
8188 $size = $efisize or die "internal error - need to specify EFI disk size\n";
8189 } elsif ($dst_drivename eq 'tpmstate0') {
8190 $dst_format = 'raw';
8191 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8192 } else {
8193 clone_disk_check_io_uring($drive, $storecfg, $src_storeid, $storeid, $use_drive_mirror);
8194
8195 $size = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
8196 }
8197 $newvolid = PVE::Storage::vdisk_alloc(
8198 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
8199 );
8200 push @$newvollist, $newvolid;
8201
8202 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
8203
8204 if (drive_is_cloudinit($drive)) {
8205 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
8206 # if this is the case, we have to complete any block-jobs still there from
8207 # previous drive-mirrors
8208 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
8209 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
8210 }
8211 goto no_data_clone;
8212 }
8213
8214 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
8215 if ($use_drive_mirror) {
8216 qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
8217 $completion, $qga, $bwlimit);
8218 } else {
8219 if ($dst_drivename eq 'efidisk0') {
8220 # the relevant data on the efidisk may be smaller than the source
8221 # e.g. on RBD/ZFS, so we use dd to copy only the amount
8222 # that is given by the OVMF_VARS.fd
8223 my $src_path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
8224 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
8225
8226 my $src_format = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[6];
8227
8228 # better for Ceph if block size is not too small, see bug #3324
8229 my $bs = 1024*1024;
8230
8231 my $cmd = ['qemu-img', 'dd', '-n', '-O', $dst_format];
8232
8233 if ($src_format eq 'qcow2' && $snapname) {
8234 die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
8235 if !min_version(kvm_user_version(), 6, 2);
8236 push $cmd->@*, '-l', $snapname;
8237 }
8238 push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
8239 run_command($cmd);
8240 } else {
8241 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit, $bwlimit);
8242 }
8243 }
8244 }
8245
8246 no_data_clone:
8247 my $size = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
8248
8249 my $disk = dclone($drive);
8250 delete $disk->{format};
8251 $disk->{file} = $newvolid;
8252 $disk->{size} = $size if defined($size);
8253
8254 return $disk;
8255 }
8256
8257 sub get_running_qemu_version {
8258 my ($vmid) = @_;
8259 my $res = mon_cmd($vmid, "query-version");
8260 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
8261 }
8262
8263 sub qemu_use_old_bios_files {
8264 my ($machine_type) = @_;
8265
8266 return if !$machine_type;
8267
8268 my $use_old_bios_files = undef;
8269
8270 if ($machine_type =~ m/^(\S+)\.pxe$/) {
8271 $machine_type = $1;
8272 $use_old_bios_files = 1;
8273 } else {
8274 my $version = extract_version($machine_type, kvm_user_version());
8275 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
8276 # load new efi bios files on migration. So this hack is required to allow
8277 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
8278 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
8279 $use_old_bios_files = !min_version($version, 2, 4);
8280 }
8281
8282 return ($use_old_bios_files, $machine_type);
8283 }
8284
8285 sub get_efivars_size {
8286 my ($conf, $efidisk) = @_;
8287
8288 my $arch = get_vm_arch($conf);
8289 $efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
8290 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
8291 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
8292 return -s $ovmf_vars;
8293 }
8294
8295 sub update_efidisk_size {
8296 my ($conf) = @_;
8297
8298 return if !defined($conf->{efidisk0});
8299
8300 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
8301 $disk->{size} = get_efivars_size($conf);
8302 $conf->{efidisk0} = print_drive($disk);
8303
8304 return;
8305 }
8306
8307 sub update_tpmstate_size {
8308 my ($conf) = @_;
8309
8310 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
8311 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8312 $conf->{tpmstate0} = print_drive($disk);
8313 }
8314
8315 sub create_efidisk($$$$$$$) {
8316 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
8317
8318 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
8319
8320 my $vars_size_b = -s $ovmf_vars;
8321 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
8322 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
8323 PVE::Storage::activate_volumes($storecfg, [$volid]);
8324
8325 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
8326 my $size = PVE::Storage::volume_size_info($storecfg, $volid, 3);
8327
8328 return ($volid, $size/1024);
8329 }
8330
8331 sub vm_iothreads_list {
8332 my ($vmid) = @_;
8333
8334 my $res = mon_cmd($vmid, 'query-iothreads');
8335
8336 my $iothreads = {};
8337 foreach my $iothread (@$res) {
8338 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
8339 }
8340
8341 return $iothreads;
8342 }
8343
8344 sub scsihw_infos {
8345 my ($conf, $drive) = @_;
8346
8347 my $maxdev = 0;
8348
8349 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
8350 $maxdev = 7;
8351 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
8352 $maxdev = 1;
8353 } else {
8354 $maxdev = 256;
8355 }
8356
8357 my $controller = int($drive->{index} / $maxdev);
8358 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
8359 ? "virtioscsi"
8360 : "scsihw";
8361
8362 return ($maxdev, $controller, $controller_prefix);
8363 }
8364
8365 sub resolve_dst_disk_format {
8366 my ($storecfg, $storeid, $src_volname, $format) = @_;
8367 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
8368
8369 if (!$format) {
8370 # if no target format is specified, use the source disk format as hint
8371 if ($src_volname) {
8372 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8373 $format = qemu_img_format($scfg, $src_volname);
8374 } else {
8375 return $defFormat;
8376 }
8377 }
8378
8379 # test if requested format is supported - else use default
8380 my $supported = grep { $_ eq $format } @$validFormats;
8381 $format = $defFormat if !$supported;
8382 return $format;
8383 }
8384
8385 # NOTE: if this logic changes, please update docs & possibly gui logic
8386 sub find_vmstate_storage {
8387 my ($conf, $storecfg) = @_;
8388
8389 # first, return storage from conf if set
8390 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
8391
8392 my ($target, $shared, $local);
8393
8394 foreach_storage_used_by_vm($conf, sub {
8395 my ($sid) = @_;
8396 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
8397 my $dst = $scfg->{shared} ? \$shared : \$local;
8398 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
8399 });
8400
8401 # second, use shared storage where VM has at least one disk
8402 # third, use local storage where VM has at least one disk
8403 # fall back to local storage
8404 $target = $shared // $local // 'local';
8405
8406 return $target;
8407 }
8408
8409 sub generate_uuid {
8410 my ($uuid, $uuid_str);
8411 UUID::generate($uuid);
8412 UUID::unparse($uuid, $uuid_str);
8413 return $uuid_str;
8414 }
8415
8416 sub generate_smbios1_uuid {
8417 return "uuid=".generate_uuid();
8418 }
8419
8420 sub nbd_stop {
8421 my ($vmid) = @_;
8422
8423 mon_cmd($vmid, 'nbd-server-stop', timeout => 25);
8424 }
8425
8426 sub create_reboot_request {
8427 my ($vmid) = @_;
8428 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
8429 or die "failed to create reboot trigger file: $!\n";
8430 close($fh);
8431 }
8432
8433 sub clear_reboot_request {
8434 my ($vmid) = @_;
8435 my $path = "/run/qemu-server/$vmid.reboot";
8436 my $res = 0;
8437
8438 $res = unlink($path);
8439 die "could not remove reboot request for $vmid: $!"
8440 if !$res && $! != POSIX::ENOENT;
8441
8442 return $res;
8443 }
8444
8445 sub bootorder_from_legacy {
8446 my ($conf, $bootcfg) = @_;
8447
8448 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
8449 my $bootindex_hash = {};
8450 my $i = 1;
8451 foreach my $o (split(//, $boot)) {
8452 $bootindex_hash->{$o} = $i*100;
8453 $i++;
8454 }
8455
8456 my $bootorder = {};
8457
8458 PVE::QemuConfig->foreach_volume($conf, sub {
8459 my ($ds, $drive) = @_;
8460
8461 if (drive_is_cdrom ($drive, 1)) {
8462 if ($bootindex_hash->{d}) {
8463 $bootorder->{$ds} = $bootindex_hash->{d};
8464 $bootindex_hash->{d} += 1;
8465 }
8466 } elsif ($bootindex_hash->{c}) {
8467 $bootorder->{$ds} = $bootindex_hash->{c}
8468 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
8469 $bootindex_hash->{c} += 1;
8470 }
8471 });
8472
8473 if ($bootindex_hash->{n}) {
8474 for (my $i = 0; $i < $MAX_NETS; $i++) {
8475 my $netname = "net$i";
8476 next if !$conf->{$netname};
8477 $bootorder->{$netname} = $bootindex_hash->{n};
8478 $bootindex_hash->{n} += 1;
8479 }
8480 }
8481
8482 return $bootorder;
8483 }
8484
8485 # Generate default device list for 'boot: order=' property. Matches legacy
8486 # default boot order, but with explicit device names. This is important, since
8487 # the fallback for when neither 'order' nor the old format is specified relies
8488 # on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
8489 sub get_default_bootdevices {
8490 my ($conf) = @_;
8491
8492 my @ret = ();
8493
8494 # harddisk
8495 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
8496 push @ret, $first if $first;
8497
8498 # cdrom
8499 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
8500 push @ret, $first if $first;
8501
8502 # network
8503 for (my $i = 0; $i < $MAX_NETS; $i++) {
8504 my $netname = "net$i";
8505 next if !$conf->{$netname};
8506 push @ret, $netname;
8507 last;
8508 }
8509
8510 return \@ret;
8511 }
8512
8513 sub device_bootorder {
8514 my ($conf) = @_;
8515
8516 return bootorder_from_legacy($conf) if !defined($conf->{boot});
8517
8518 my $boot = parse_property_string($boot_fmt, $conf->{boot});
8519
8520 my $bootorder = {};
8521 if (!defined($boot) || $boot->{legacy}) {
8522 $bootorder = bootorder_from_legacy($conf, $boot);
8523 } elsif ($boot->{order}) {
8524 my $i = 100; # start at 100 to allow user to insert devices before us with -args
8525 for my $dev (PVE::Tools::split_list($boot->{order})) {
8526 $bootorder->{$dev} = $i++;
8527 }
8528 }
8529
8530 return $bootorder;
8531 }
8532
8533 sub register_qmeventd_handle {
8534 my ($vmid) = @_;
8535
8536 my $fh;
8537 my $peer = "/var/run/qmeventd.sock";
8538 my $count = 0;
8539
8540 for (;;) {
8541 $count++;
8542 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
8543 last if $fh;
8544 if ($! != EINTR && $! != EAGAIN) {
8545 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
8546 }
8547 if ($count > 4) {
8548 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
8549 . "after $count retries\n";
8550 }
8551 usleep(25000);
8552 }
8553
8554 # send handshake to mark VM as backing up
8555 print $fh to_json({vzdump => {vmid => "$vmid"}});
8556
8557 # return handle to be closed later when inhibit is no longer required
8558 return $fh;
8559 }
8560
8561 # bash completion helper
8562
8563 sub complete_backup_archives {
8564 my ($cmdname, $pname, $cvalue) = @_;
8565
8566 my $cfg = PVE::Storage::config();
8567
8568 my $storeid;
8569
8570 if ($cvalue =~ m/^([^:]+):/) {
8571 $storeid = $1;
8572 }
8573
8574 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
8575
8576 my $res = [];
8577 foreach my $id (keys %$data) {
8578 foreach my $item (@{$data->{$id}}) {
8579 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
8580 push @$res, $item->{volid} if defined($item->{volid});
8581 }
8582 }
8583
8584 return $res;
8585 }
8586
8587 my $complete_vmid_full = sub {
8588 my ($running) = @_;
8589
8590 my $idlist = vmstatus();
8591
8592 my $res = [];
8593
8594 foreach my $id (keys %$idlist) {
8595 my $d = $idlist->{$id};
8596 if (defined($running)) {
8597 next if $d->{template};
8598 next if $running && $d->{status} ne 'running';
8599 next if !$running && $d->{status} eq 'running';
8600 }
8601 push @$res, $id;
8602
8603 }
8604 return $res;
8605 };
8606
8607 sub complete_vmid {
8608 return &$complete_vmid_full();
8609 }
8610
8611 sub complete_vmid_stopped {
8612 return &$complete_vmid_full(0);
8613 }
8614
8615 sub complete_vmid_running {
8616 return &$complete_vmid_full(1);
8617 }
8618
8619 sub complete_storage {
8620
8621 my $cfg = PVE::Storage::config();
8622 my $ids = $cfg->{ids};
8623
8624 my $res = [];
8625 foreach my $sid (keys %$ids) {
8626 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
8627 next if !$ids->{$sid}->{content}->{images};
8628 push @$res, $sid;
8629 }
8630
8631 return $res;
8632 }
8633
8634 sub complete_migration_storage {
8635 my ($cmd, $param, $current_value, $all_args) = @_;
8636
8637 my $targetnode = @$all_args[1];
8638
8639 my $cfg = PVE::Storage::config();
8640 my $ids = $cfg->{ids};
8641
8642 my $res = [];
8643 foreach my $sid (keys %$ids) {
8644 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
8645 next if !$ids->{$sid}->{content}->{images};
8646 push @$res, $sid;
8647 }
8648
8649 return $res;
8650 }
8651
8652 sub vm_is_paused {
8653 my ($vmid, $include_suspended) = @_;
8654 my $qmpstatus = eval {
8655 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8656 mon_cmd($vmid, "query-status");
8657 };
8658 warn "$@\n" if $@;
8659 return $qmpstatus && (
8660 $qmpstatus->{status} eq "paused" ||
8661 $qmpstatus->{status} eq "prelaunch" ||
8662 ($include_suspended && $qmpstatus->{status} eq "suspended")
8663 );
8664 }
8665
8666 sub check_volume_storage_type {
8667 my ($storecfg, $vol) = @_;
8668
8669 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8670 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8671 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8672
8673 die "storage '$storeid' does not support content-type '$vtype'\n"
8674 if !$scfg->{content}->{$vtype};
8675
8676 return 1;
8677 }
8678
8679 sub add_nets_bridge_fdb {
8680 my ($conf, $vmid) = @_;
8681
8682 for my $opt (keys %$conf) {
8683 next if $opt !~ m/^net(\d+)$/;
8684 my $iface = "tap${vmid}i$1";
8685 # NOTE: expect setups with learning off to *not* use auto-random-generation of MAC on start
8686 my $net = parse_net($conf->{$opt}, 1) or next;
8687
8688 my $mac = $net->{macaddr};
8689 if (!$mac) {
8690 log_warn("MAC learning disabled, but vNIC '$iface' has no static MAC to add to forwarding DB!")
8691 if !file_read_firstline("/sys/class/net/$iface/brport/learning");
8692 next;
8693 }
8694
8695 my $bridge = $net->{bridge};
8696 if (!$bridge) {
8697 log_warn("Interface '$iface' not attached to any bridge.");
8698 next;
8699 }
8700 if ($have_sdn) {
8701 PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge);
8702 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
8703 PVE::Network::add_bridge_fdb($iface, $mac);
8704 }
8705 }
8706 }
8707
8708 sub del_nets_bridge_fdb {
8709 my ($conf, $vmid) = @_;
8710
8711 for my $opt (keys %$conf) {
8712 next if $opt !~ m/^net(\d+)$/;
8713 my $iface = "tap${vmid}i$1";
8714
8715 my $net = parse_net($conf->{$opt}) or next;
8716 my $mac = $net->{macaddr} or next;
8717
8718 my $bridge = $net->{bridge};
8719 if ($have_sdn) {
8720 PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge);
8721 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
8722 PVE::Network::del_bridge_fdb($iface, $mac);
8723 }
8724 }
8725 }
8726
8727 sub create_ifaces_ipams_ips {
8728 my ($conf, $vmid) = @_;
8729
8730 return if !$have_sdn;
8731
8732 foreach my $opt (keys %$conf) {
8733 if ($opt =~ m/^net(\d+)$/) {
8734 my $value = $conf->{$opt};
8735 my $net = PVE::QemuServer::parse_net($value);
8736 eval { PVE::Network::SDN::Vnets::add_next_free_cidr($net->{bridge}, $conf->{name}, $net->{macaddr}, $vmid, undef, 1) };
8737 warn $@ if $@;
8738 }
8739 }
8740 }
8741
8742 sub delete_ifaces_ipams_ips {
8743 my ($conf, $vmid) = @_;
8744
8745 return if !$have_sdn;
8746
8747 foreach my $opt (keys %$conf) {
8748 if ($opt =~ m/^net(\d+)$/) {
8749 my $net = PVE::QemuServer::parse_net($conf->{$opt});
8750 eval { PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{macaddr}, $conf->{name}) };
8751 warn $@ if $@;
8752 }
8753 }
8754 }
8755
8756 1;