]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
net devs: register vNIC mac to FDB on start/resume
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5
6 use Cwd 'abs_path';
7 use Digest::SHA;
8 use Fcntl ':flock';
9 use Fcntl;
10 use File::Basename;
11 use File::Copy qw(copy);
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use IO::Dir;
16 use IO::File;
17 use IO::Handle;
18 use IO::Select;
19 use IO::Socket::UNIX;
20 use IPC::Open3;
21 use JSON;
22 use MIME::Base64;
23 use POSIX;
24 use Storable qw(dclone);
25 use Time::HiRes qw(gettimeofday usleep);
26 use URI::Escape;
27 use UUID;
28
29 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
30 use PVE::CGroup;
31 use PVE::CpuSet;
32 use PVE::DataCenterConfig;
33 use PVE::Exception qw(raise raise_param_exc);
34 use PVE::Format qw(render_duration render_bytes);
35 use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
36 use PVE::INotify;
37 use PVE::JSONSchema qw(get_standard_option parse_property_string);
38 use PVE::ProcFSTools;
39 use PVE::PBSClient;
40 use PVE::RESTEnvironment qw(log_warn);
41 use PVE::RPCEnvironment;
42 use PVE::Storage;
43 use PVE::SysFSTools;
44 use PVE::Systemd;
45 use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
46
47 use PVE::QMPClient;
48 use PVE::QemuConfig;
49 use PVE::QemuServer::Helpers qw(min_version config_aware_timeout windows_version);
50 use PVE::QemuServer::Cloudinit;
51 use PVE::QemuServer::CGroup;
52 use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
53 use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
54 use PVE::QemuServer::Machine;
55 use PVE::QemuServer::Memory;
56 use PVE::QemuServer::Monitor qw(mon_cmd);
57 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
58 use PVE::QemuServer::USB qw(parse_usb_device);
59
60 my $have_sdn;
61 eval {
62 require PVE::Network::SDN::Zones;
63 $have_sdn = 1;
64 };
65
66 my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
67 my $OVMF = {
68 x86_64 => {
69 '4m-no-smm' => [
70 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
71 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
72 ],
73 '4m-no-smm-ms' => [
74 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
75 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
76 ],
77 '4m' => [
78 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
79 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
80 ],
81 '4m-ms' => [
82 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
83 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
84 ],
85 default => [
86 "$EDK2_FW_BASE/OVMF_CODE.fd",
87 "$EDK2_FW_BASE/OVMF_VARS.fd",
88 ],
89 },
90 aarch64 => {
91 default => [
92 "$EDK2_FW_BASE/AAVMF_CODE.fd",
93 "$EDK2_FW_BASE/AAVMF_VARS.fd",
94 ],
95 },
96 };
97
98 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
99
100 # Note about locking: we use flock on the config file protect against concurent actions.
101 # Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
102 # 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
103 # But you can ignore this kind of lock with the --skiplock flag.
104
105 cfs_register_file('/qemu-server/',
106 \&parse_vm_config,
107 \&write_vm_config);
108
109 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
110 description => "Some command save/restore state from this location.",
111 type => 'string',
112 maxLength => 128,
113 optional => 1,
114 });
115
116 PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
117 description => "Specifies the Qemu machine type.",
118 type => 'string',
119 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
120 maxLength => 40,
121 optional => 1,
122 });
123
124 #no warnings 'redefine';
125
126 my $nodename_cache;
127 sub nodename {
128 $nodename_cache //= PVE::INotify::nodename();
129 return $nodename_cache;
130 }
131
132 my $watchdog_fmt = {
133 model => {
134 default_key => 1,
135 type => 'string',
136 enum => [qw(i6300esb ib700)],
137 description => "Watchdog type to emulate.",
138 default => 'i6300esb',
139 optional => 1,
140 },
141 action => {
142 type => 'string',
143 enum => [qw(reset shutdown poweroff pause debug none)],
144 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
145 optional => 1,
146 },
147 };
148 PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
149
150 my $agent_fmt = {
151 enabled => {
152 description => "Enable/disable communication with a Qemu Guest Agent (QGA) running in the VM.",
153 type => 'boolean',
154 default => 0,
155 default_key => 1,
156 },
157 fstrim_cloned_disks => {
158 description => "Run fstrim after moving a disk or migrating the VM.",
159 type => 'boolean',
160 optional => 1,
161 default => 0
162 },
163 type => {
164 description => "Select the agent type",
165 type => 'string',
166 default => 'virtio',
167 optional => 1,
168 enum => [qw(virtio isa)],
169 },
170 };
171
172 my $vga_fmt = {
173 type => {
174 description => "Select the VGA type.",
175 type => 'string',
176 default => 'std',
177 optional => 1,
178 default_key => 1,
179 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware)],
180 },
181 memory => {
182 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
183 type => 'integer',
184 optional => 1,
185 minimum => 4,
186 maximum => 512,
187 },
188 };
189
190 my $ivshmem_fmt = {
191 size => {
192 type => 'integer',
193 minimum => 1,
194 description => "The size of the file in MB.",
195 },
196 name => {
197 type => 'string',
198 pattern => '[a-zA-Z0-9\-]+',
199 optional => 1,
200 format_description => 'string',
201 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
202 },
203 };
204
205 my $audio_fmt = {
206 device => {
207 type => 'string',
208 enum => [qw(ich9-intel-hda intel-hda AC97)],
209 description => "Configure an audio device."
210 },
211 driver => {
212 type => 'string',
213 enum => ['spice', 'none'],
214 default => 'spice',
215 optional => 1,
216 description => "Driver backend for the audio device."
217 },
218 };
219
220 my $spice_enhancements_fmt = {
221 foldersharing => {
222 type => 'boolean',
223 optional => 1,
224 default => '0',
225 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
226 },
227 videostreaming => {
228 type => 'string',
229 enum => ['off', 'all', 'filter'],
230 default => 'off',
231 optional => 1,
232 description => "Enable video streaming. Uses compression for detected video streams."
233 },
234 };
235
236 my $rng_fmt = {
237 source => {
238 type => 'string',
239 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
240 default_key => 1,
241 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
242 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
243 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
244 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
245 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
246 ." a hardware RNG from the host.",
247 },
248 max_bytes => {
249 type => 'integer',
250 description => "Maximum bytes of entropy allowed to get injected into the guest every"
251 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
252 ." `0` to disable limiting (potentially dangerous!).",
253 optional => 1,
254
255 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
256 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
257 # reading from /dev/urandom
258 default => 1024,
259 },
260 period => {
261 type => 'integer',
262 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
263 ." the guest to retrieve another 'max_bytes' of entropy.",
264 optional => 1,
265 default => 1000,
266 },
267 };
268
269 my $meta_info_fmt = {
270 'ctime' => {
271 type => 'integer',
272 description => "The guest creation timestamp as UNIX epoch time",
273 minimum => 0,
274 optional => 1,
275 },
276 'creation-qemu' => {
277 type => 'string',
278 description => "The QEMU (machine) version from the time this VM was created.",
279 pattern => '\d+(\.\d+)+',
280 optional => 1,
281 },
282 };
283
284 my $confdesc = {
285 onboot => {
286 optional => 1,
287 type => 'boolean',
288 description => "Specifies whether a VM will be started during system bootup.",
289 default => 0,
290 },
291 autostart => {
292 optional => 1,
293 type => 'boolean',
294 description => "Automatic restart after crash (currently ignored).",
295 default => 0,
296 },
297 hotplug => {
298 optional => 1,
299 type => 'string', format => 'pve-hotplug-features',
300 description => "Selectively enable hotplug features. This is a comma separated list of"
301 ." hotplug features: 'network', 'disk', 'cpu', 'memory', 'usb' and 'cloudinit'. Use '0' to disable"
302 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`."
303 ." USB hotplugging is possible for guests with machine version >= 7.1 and ostype l26 or"
304 ." windows > 7.",
305 default => 'network,disk,usb',
306 },
307 reboot => {
308 optional => 1,
309 type => 'boolean',
310 description => "Allow reboot. If set to '0' the VM exit on reboot.",
311 default => 1,
312 },
313 lock => {
314 optional => 1,
315 type => 'string',
316 description => "Lock/unlock the VM.",
317 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
318 },
319 cpulimit => {
320 optional => 1,
321 type => 'number',
322 description => "Limit of CPU usage.",
323 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
324 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
325 minimum => 0,
326 maximum => 128,
327 default => 0,
328 },
329 cpuunits => {
330 optional => 1,
331 type => 'integer',
332 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
333 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
334 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
335 ." weights of all the other running VMs.",
336 minimum => 1,
337 maximum => 262144,
338 default => 'cgroup v1: 1024, cgroup v2: 100',
339 },
340 memory => {
341 optional => 1,
342 type => 'integer',
343 description => "Amount of RAM for the VM in MB. This is the maximum available memory when"
344 ." you use the balloon device.",
345 minimum => 16,
346 default => 512,
347 },
348 balloon => {
349 optional => 1,
350 type => 'integer',
351 description => "Amount of target RAM for the VM in MB. Using zero disables the ballon driver.",
352 minimum => 0,
353 },
354 shares => {
355 optional => 1,
356 type => 'integer',
357 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
358 ." more memory this VM gets. Number is relative to weights of all other running VMs."
359 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
360 minimum => 0,
361 maximum => 50000,
362 default => 1000,
363 },
364 keyboard => {
365 optional => 1,
366 type => 'string',
367 description => "Keyboard layout for VNC server. This option is generally not required and"
368 ." is often better handled from within the guest OS.",
369 enum => PVE::Tools::kvmkeymaplist(),
370 default => undef,
371 },
372 name => {
373 optional => 1,
374 type => 'string', format => 'dns-name',
375 description => "Set a name for the VM. Only used on the configuration web interface.",
376 },
377 scsihw => {
378 optional => 1,
379 type => 'string',
380 description => "SCSI controller model",
381 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
382 default => 'lsi',
383 },
384 description => {
385 optional => 1,
386 type => 'string',
387 description => "Description for the VM. Shown in the web-interface VM's summary."
388 ." This is saved as comment inside the configuration file.",
389 maxLength => 1024 * 8,
390 },
391 ostype => {
392 optional => 1,
393 type => 'string',
394 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
395 description => "Specify guest operating system.",
396 verbose_description => <<EODESC,
397 Specify guest operating system. This is used to enable special
398 optimization/features for specific operating systems:
399
400 [horizontal]
401 other;; unspecified OS
402 wxp;; Microsoft Windows XP
403 w2k;; Microsoft Windows 2000
404 w2k3;; Microsoft Windows 2003
405 w2k8;; Microsoft Windows 2008
406 wvista;; Microsoft Windows Vista
407 win7;; Microsoft Windows 7
408 win8;; Microsoft Windows 8/2012/2012r2
409 win10;; Microsoft Windows 10/2016/2019
410 win11;; Microsoft Windows 11/2022
411 l24;; Linux 2.4 Kernel
412 l26;; Linux 2.6 - 5.X Kernel
413 solaris;; Solaris/OpenSolaris/OpenIndiania kernel
414 EODESC
415 },
416 boot => {
417 optional => 1,
418 type => 'string', format => 'pve-qm-boot',
419 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
420 ." key or 'legacy=' is deprecated.",
421 },
422 bootdisk => {
423 optional => 1,
424 type => 'string', format => 'pve-qm-bootdisk',
425 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
426 pattern => '(ide|sata|scsi|virtio)\d+',
427 },
428 smp => {
429 optional => 1,
430 type => 'integer',
431 description => "The number of CPUs. Please use option -sockets instead.",
432 minimum => 1,
433 default => 1,
434 },
435 sockets => {
436 optional => 1,
437 type => 'integer',
438 description => "The number of CPU sockets.",
439 minimum => 1,
440 default => 1,
441 },
442 cores => {
443 optional => 1,
444 type => 'integer',
445 description => "The number of cores per socket.",
446 minimum => 1,
447 default => 1,
448 },
449 numa => {
450 optional => 1,
451 type => 'boolean',
452 description => "Enable/disable NUMA.",
453 default => 0,
454 },
455 hugepages => {
456 optional => 1,
457 type => 'string',
458 description => "Enable/disable hugepages memory.",
459 enum => [qw(any 2 1024)],
460 },
461 keephugepages => {
462 optional => 1,
463 type => 'boolean',
464 default => 0,
465 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
466 ." after VM shutdown and can be used for subsequent starts.",
467 },
468 vcpus => {
469 optional => 1,
470 type => 'integer',
471 description => "Number of hotplugged vcpus.",
472 minimum => 1,
473 default => 0,
474 },
475 acpi => {
476 optional => 1,
477 type => 'boolean',
478 description => "Enable/disable ACPI.",
479 default => 1,
480 },
481 agent => {
482 optional => 1,
483 description => "Enable/disable communication with the Qemu Guest Agent and its properties.",
484 type => 'string',
485 format => $agent_fmt,
486 },
487 kvm => {
488 optional => 1,
489 type => 'boolean',
490 description => "Enable/disable KVM hardware virtualization.",
491 default => 1,
492 },
493 tdf => {
494 optional => 1,
495 type => 'boolean',
496 description => "Enable/disable time drift fix.",
497 default => 0,
498 },
499 localtime => {
500 optional => 1,
501 type => 'boolean',
502 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
503 ." the `ostype` indicates a Microsoft Windows OS.",
504 },
505 freeze => {
506 optional => 1,
507 type => 'boolean',
508 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
509 },
510 vga => {
511 optional => 1,
512 type => 'string', format => $vga_fmt,
513 description => "Configure the VGA hardware.",
514 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
515 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
516 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
517 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
518 ." display server. For win* OS you can select how many independent displays you want,"
519 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
520 ." using a serial device as terminal.",
521 },
522 watchdog => {
523 optional => 1,
524 type => 'string', format => 'pve-qm-watchdog',
525 description => "Create a virtual hardware watchdog device.",
526 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
527 ." action), the watchdog must be periodically polled by an agent inside the guest or"
528 ." else the watchdog will reset the guest (or execute the respective action specified)",
529 },
530 startdate => {
531 optional => 1,
532 type => 'string',
533 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
534 description => "Set the initial date of the real time clock. Valid format for date are:"
535 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
536 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
537 default => 'now',
538 },
539 startup => get_standard_option('pve-startup-order'),
540 template => {
541 optional => 1,
542 type => 'boolean',
543 description => "Enable/disable Template.",
544 default => 0,
545 },
546 args => {
547 optional => 1,
548 type => 'string',
549 description => "Arbitrary arguments passed to kvm.",
550 verbose_description => <<EODESCR,
551 Arbitrary arguments passed to kvm, for example:
552
553 args: -no-reboot -no-hpet
554
555 NOTE: this option is for experts only.
556 EODESCR
557 },
558 tablet => {
559 optional => 1,
560 type => 'boolean',
561 default => 1,
562 description => "Enable/disable the USB tablet device.",
563 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
564 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
565 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
566 ." may consider disabling this to save some context switches. This is turned off by"
567 ." default if you use spice (`qm set <vmid> --vga qxl`).",
568 },
569 migrate_speed => {
570 optional => 1,
571 type => 'integer',
572 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
573 minimum => 0,
574 default => 0,
575 },
576 migrate_downtime => {
577 optional => 1,
578 type => 'number',
579 description => "Set maximum tolerated downtime (in seconds) for migrations.",
580 minimum => 0,
581 default => 0.1,
582 },
583 cdrom => {
584 optional => 1,
585 type => 'string', format => 'pve-qm-ide',
586 typetext => '<volume>',
587 description => "This is an alias for option -ide2",
588 },
589 cpu => {
590 optional => 1,
591 description => "Emulated CPU type.",
592 type => 'string',
593 format => 'pve-vm-cpu-conf',
594 },
595 parent => get_standard_option('pve-snapshot-name', {
596 optional => 1,
597 description => "Parent snapshot name. This is used internally, and should not be modified.",
598 }),
599 snaptime => {
600 optional => 1,
601 description => "Timestamp for snapshots.",
602 type => 'integer',
603 minimum => 0,
604 },
605 vmstate => {
606 optional => 1,
607 type => 'string', format => 'pve-volume-id',
608 description => "Reference to a volume which stores the VM state. This is used internally"
609 ." for snapshots.",
610 },
611 vmstatestorage => get_standard_option('pve-storage-id', {
612 description => "Default storage for VM state volumes/files.",
613 optional => 1,
614 }),
615 runningmachine => get_standard_option('pve-qemu-machine', {
616 description => "Specifies the QEMU machine type of the running vm. This is used internally"
617 ." for snapshots.",
618 }),
619 runningcpu => {
620 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
621 ." internally for snapshots.",
622 optional => 1,
623 type => 'string',
624 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
625 format_description => 'QEMU -cpu parameter'
626 },
627 machine => get_standard_option('pve-qemu-machine'),
628 arch => {
629 description => "Virtual processor architecture. Defaults to the host.",
630 optional => 1,
631 type => 'string',
632 enum => [qw(x86_64 aarch64)],
633 },
634 smbios1 => {
635 description => "Specify SMBIOS type 1 fields.",
636 type => 'string', format => 'pve-qm-smbios1',
637 maxLength => 512,
638 optional => 1,
639 },
640 protection => {
641 optional => 1,
642 type => 'boolean',
643 description => "Sets the protection flag of the VM. This will disable the remove VM and"
644 ." remove disk operations.",
645 default => 0,
646 },
647 bios => {
648 optional => 1,
649 type => 'string',
650 enum => [ qw(seabios ovmf) ],
651 description => "Select BIOS implementation.",
652 default => 'seabios',
653 },
654 vmgenid => {
655 type => 'string',
656 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
657 format_description => 'UUID',
658 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
659 ." to disable explicitly.",
660 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
661 ." value identifier to the guest OS. This allows to notify the guest operating system"
662 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
663 ." execution or creation from a template). The guest operating system notices the"
664 ." change, and is then able to react as appropriate by marking its copies of"
665 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
666 ."Note that auto-creation only works when done through API/CLI create or update methods"
667 .", but not when manually editing the config file.",
668 default => "1 (autogenerated)",
669 optional => 1,
670 },
671 hookscript => {
672 type => 'string',
673 format => 'pve-volume-id',
674 optional => 1,
675 description => "Script that will be executed during various steps in the vms lifetime.",
676 },
677 ivshmem => {
678 type => 'string',
679 format => $ivshmem_fmt,
680 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
681 ." the host.",
682 optional => 1,
683 },
684 audio0 => {
685 type => 'string',
686 format => $audio_fmt,
687 description => "Configure a audio device, useful in combination with QXL/Spice.",
688 optional => 1
689 },
690 spice_enhancements => {
691 type => 'string',
692 format => $spice_enhancements_fmt,
693 description => "Configure additional enhancements for SPICE.",
694 optional => 1
695 },
696 tags => {
697 type => 'string', format => 'pve-tag-list',
698 description => 'Tags of the VM. This is only meta information.',
699 optional => 1,
700 },
701 rng0 => {
702 type => 'string',
703 format => $rng_fmt,
704 description => "Configure a VirtIO-based Random Number Generator.",
705 optional => 1,
706 },
707 meta => {
708 type => 'string',
709 format => $meta_info_fmt,
710 description => "Some (read-only) meta-information about this guest.",
711 optional => 1,
712 },
713 affinity => {
714 type => 'string', format => 'pve-cpuset',
715 description => "List of host cores used to execute guest processes.",
716 optional => 1,
717 },
718 };
719
720 my $cicustom_fmt = {
721 meta => {
722 type => 'string',
723 optional => 1,
724 description => 'Specify a custom file containing all meta data passed to the VM via"
725 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
726 format => 'pve-volume-id',
727 format_description => 'volume',
728 },
729 network => {
730 type => 'string',
731 optional => 1,
732 description => 'Specify a custom file containing all network data passed to the VM via'
733 .' cloud-init.',
734 format => 'pve-volume-id',
735 format_description => 'volume',
736 },
737 user => {
738 type => 'string',
739 optional => 1,
740 description => 'Specify a custom file containing all user data passed to the VM via'
741 .' cloud-init.',
742 format => 'pve-volume-id',
743 format_description => 'volume',
744 },
745 vendor => {
746 type => 'string',
747 optional => 1,
748 description => 'Specify a custom file containing all vendor data passed to the VM via'
749 .' cloud-init.',
750 format => 'pve-volume-id',
751 format_description => 'volume',
752 },
753 };
754 PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
755
756 my $confdesc_cloudinit = {
757 citype => {
758 optional => 1,
759 type => 'string',
760 description => 'Specifies the cloud-init configuration format. The default depends on the'
761 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
762 .' and `configdrive2` for windows.',
763 enum => ['configdrive2', 'nocloud', 'opennebula'],
764 },
765 ciuser => {
766 optional => 1,
767 type => 'string',
768 description => "cloud-init: User name to change ssh keys and password for instead of the"
769 ." image's configured default user.",
770 },
771 cipassword => {
772 optional => 1,
773 type => 'string',
774 description => 'cloud-init: Password to assign the user. Using this is generally not'
775 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
776 .' support hashed passwords.',
777 },
778 cicustom => {
779 optional => 1,
780 type => 'string',
781 description => 'cloud-init: Specify custom files to replace the automatically generated'
782 .' ones at start.',
783 format => 'pve-qm-cicustom',
784 },
785 searchdomain => {
786 optional => 1,
787 type => 'string',
788 description => 'cloud-init: Sets DNS search domains for a container. Create will'
789 .' automatically use the setting from the host if neither searchdomain nor nameserver'
790 .' are set.',
791 },
792 nameserver => {
793 optional => 1,
794 type => 'string', format => 'address-list',
795 description => 'cloud-init: Sets DNS server IP address for a container. Create will'
796 .' automatically use the setting from the host if neither searchdomain nor nameserver'
797 .' are set.',
798 },
799 sshkeys => {
800 optional => 1,
801 type => 'string',
802 format => 'urlencoded',
803 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
804 },
805 };
806
807 # what about other qemu settings ?
808 #cpu => 'string',
809 #machine => 'string',
810 #fda => 'file',
811 #fdb => 'file',
812 #mtdblock => 'file',
813 #sd => 'file',
814 #pflash => 'file',
815 #snapshot => 'bool',
816 #bootp => 'file',
817 ##tftp => 'dir',
818 ##smb => 'dir',
819 #kernel => 'file',
820 #append => 'string',
821 #initrd => 'file',
822 ##soundhw => 'string',
823
824 while (my ($k, $v) = each %$confdesc) {
825 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
826 }
827
828 my $MAX_USB_DEVICES = 14;
829 my $MAX_NETS = 32;
830 my $MAX_SERIAL_PORTS = 4;
831 my $MAX_PARALLEL_PORTS = 3;
832 my $MAX_NUMA = 8;
833
834 my $numa_fmt = {
835 cpus => {
836 type => "string",
837 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
838 description => "CPUs accessing this NUMA node.",
839 format_description => "id[-id];...",
840 },
841 memory => {
842 type => "number",
843 description => "Amount of memory this NUMA node provides.",
844 optional => 1,
845 },
846 hostnodes => {
847 type => "string",
848 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
849 description => "Host NUMA nodes to use.",
850 format_description => "id[-id];...",
851 optional => 1,
852 },
853 policy => {
854 type => 'string',
855 enum => [qw(preferred bind interleave)],
856 description => "NUMA allocation policy.",
857 optional => 1,
858 },
859 };
860 PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
861 my $numadesc = {
862 optional => 1,
863 type => 'string', format => $numa_fmt,
864 description => "NUMA topology.",
865 };
866 PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
867
868 for (my $i = 0; $i < $MAX_NUMA; $i++) {
869 $confdesc->{"numa$i"} = $numadesc;
870 }
871
872 my $nic_model_list = [
873 'e1000',
874 'e1000-82540em',
875 'e1000-82544gc',
876 'e1000-82545em',
877 'e1000e',
878 'i82551',
879 'i82557b',
880 'i82559er',
881 'ne2k_isa',
882 'ne2k_pci',
883 'pcnet',
884 'rtl8139',
885 'virtio',
886 'vmxnet3',
887 ];
888 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
889
890 my $net_fmt_bridge_descr = <<__EOD__;
891 Bridge to attach the network device to. The Proxmox VE standard bridge
892 is called 'vmbr0'.
893
894 If you do not specify a bridge, we create a kvm user (NATed) network
895 device, which provides DHCP and DNS services. The following addresses
896 are used:
897
898 10.0.2.2 Gateway
899 10.0.2.3 DNS Server
900 10.0.2.4 SMB Server
901
902 The DHCP server assign addresses to the guest starting from 10.0.2.15.
903 __EOD__
904
905 my $net_fmt = {
906 macaddr => get_standard_option('mac-addr', {
907 description => "MAC address. That address must be unique withing your network. This is"
908 ." automatically generated if not specified.",
909 }),
910 model => {
911 type => 'string',
912 description => "Network Card Model. The 'virtio' model provides the best performance with"
913 ." very low CPU overhead. If your guest does not support this driver, it is usually"
914 ." best to use 'e1000'.",
915 enum => $nic_model_list,
916 default_key => 1,
917 },
918 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
919 bridge => get_standard_option('pve-bridge-id', {
920 description => $net_fmt_bridge_descr,
921 optional => 1,
922 }),
923 queues => {
924 type => 'integer',
925 minimum => 0, maximum => 16,
926 description => 'Number of packet queues to be used on the device.',
927 optional => 1,
928 },
929 rate => {
930 type => 'number',
931 minimum => 0,
932 description => "Rate limit in mbps (megabytes per second) as floating point number.",
933 optional => 1,
934 },
935 tag => {
936 type => 'integer',
937 minimum => 1, maximum => 4094,
938 description => 'VLAN tag to apply to packets on this interface.',
939 optional => 1,
940 },
941 trunks => {
942 type => 'string',
943 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
944 description => 'VLAN trunks to pass through this interface.',
945 format_description => 'vlanid[;vlanid...]',
946 optional => 1,
947 },
948 firewall => {
949 type => 'boolean',
950 description => 'Whether this interface should be protected by the firewall.',
951 optional => 1,
952 },
953 link_down => {
954 type => 'boolean',
955 description => 'Whether this interface should be disconnected (like pulling the plug).',
956 optional => 1,
957 },
958 mtu => {
959 type => 'integer',
960 minimum => 1, maximum => 65520,
961 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
962 optional => 1,
963 },
964 };
965
966 my $netdesc = {
967 optional => 1,
968 type => 'string', format => $net_fmt,
969 description => "Specify network devices.",
970 };
971
972 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
973
974 my $ipconfig_fmt = {
975 ip => {
976 type => 'string',
977 format => 'pve-ipv4-config',
978 format_description => 'IPv4Format/CIDR',
979 description => 'IPv4 address in CIDR format.',
980 optional => 1,
981 default => 'dhcp',
982 },
983 gw => {
984 type => 'string',
985 format => 'ipv4',
986 format_description => 'GatewayIPv4',
987 description => 'Default gateway for IPv4 traffic.',
988 optional => 1,
989 requires => 'ip',
990 },
991 ip6 => {
992 type => 'string',
993 format => 'pve-ipv6-config',
994 format_description => 'IPv6Format/CIDR',
995 description => 'IPv6 address in CIDR format.',
996 optional => 1,
997 default => 'dhcp',
998 },
999 gw6 => {
1000 type => 'string',
1001 format => 'ipv6',
1002 format_description => 'GatewayIPv6',
1003 description => 'Default gateway for IPv6 traffic.',
1004 optional => 1,
1005 requires => 'ip6',
1006 },
1007 };
1008 PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
1009 my $ipconfigdesc = {
1010 optional => 1,
1011 type => 'string', format => 'pve-qm-ipconfig',
1012 description => <<'EODESCR',
1013 cloud-init: Specify IP addresses and gateways for the corresponding interface.
1014
1015 IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1016
1017 The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1018 gateway should be provided.
1019 For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1020 cloud-init 19.4 or newer.
1021
1022 If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1023 dhcp on IPv4.
1024 EODESCR
1025 };
1026 PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1027
1028 for (my $i = 0; $i < $MAX_NETS; $i++) {
1029 $confdesc->{"net$i"} = $netdesc;
1030 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1031 }
1032
1033 foreach my $key (keys %$confdesc_cloudinit) {
1034 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1035 }
1036
1037 PVE::JSONSchema::register_format('pve-cpuset', \&pve_verify_cpuset);
1038 sub pve_verify_cpuset {
1039 my ($set_text, $noerr) = @_;
1040
1041 my ($count, $members) = eval { PVE::CpuSet::parse_cpuset($set_text) };
1042
1043 if ($@) {
1044 return if $noerr;
1045 die "unable to parse cpuset option\n";
1046 }
1047
1048 return PVE::CpuSet->new($members)->short_string();
1049 }
1050
1051 PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1052 sub verify_volume_id_or_qm_path {
1053 my ($volid, $noerr) = @_;
1054
1055 return $volid if $volid eq 'none' || $volid eq 'cdrom';
1056
1057 return verify_volume_id_or_absolute_path($volid, $noerr);
1058 }
1059
1060 PVE::JSONSchema::register_format('pve-volume-id-or-absolute-path', \&verify_volume_id_or_absolute_path);
1061 sub verify_volume_id_or_absolute_path {
1062 my ($volid, $noerr) = @_;
1063
1064 return $volid if $volid =~ m|^/|;
1065
1066 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1067 if ($@) {
1068 return if $noerr;
1069 die $@;
1070 }
1071 return $volid;
1072 }
1073
1074 my $usb_fmt = {
1075 host => {
1076 default_key => 1,
1077 type => 'string', format => 'pve-qm-usb-device',
1078 format_description => 'HOSTUSBDEVICE|spice',
1079 description => <<EODESCR,
1080 The Host USB device or port or the value 'spice'. HOSTUSBDEVICE syntax is:
1081
1082 'bus-port(.port)*' (decimal numbers) or
1083 'vendor_id:product_id' (hexadeciaml numbers) or
1084 'spice'
1085
1086 You can use the 'lsusb -t' command to list existing usb devices.
1087
1088 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1089 machines - use with special care.
1090
1091 The value 'spice' can be used to add a usb redirection devices for spice.
1092 EODESCR
1093 },
1094 usb3 => {
1095 optional => 1,
1096 type => 'boolean',
1097 description => "Specifies whether if given host option is a USB3 device or port."
1098 ." For modern guests (machine version >= 7.1 and ostype l26 and windows > 7), this flag"
1099 ." is irrelevant (all devices are plugged into a xhci controller).",
1100 default => 0,
1101 },
1102 };
1103
1104 my $usbdesc = {
1105 optional => 1,
1106 type => 'string', format => $usb_fmt,
1107 description => "Configure an USB device (n is 0 to 4, for machine version >= 7.1 and ostype"
1108 ." l26 or windows > 7, n can be up to 14).",
1109 };
1110 PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
1111
1112 my $serialdesc = {
1113 optional => 1,
1114 type => 'string',
1115 pattern => '(/dev/.+|socket)',
1116 description => "Create a serial device inside the VM (n is 0 to 3)",
1117 verbose_description => <<EODESCR,
1118 Create a serial device inside the VM (n is 0 to 3), and pass through a
1119 host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1120 host side (use 'qm terminal' to open a terminal connection).
1121
1122 NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1123 use with special care.
1124
1125 CAUTION: Experimental! User reported problems with this option.
1126 EODESCR
1127 };
1128
1129 my $paralleldesc= {
1130 optional => 1,
1131 type => 'string',
1132 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1133 description => "Map host parallel devices (n is 0 to 2).",
1134 verbose_description => <<EODESCR,
1135 Map host parallel devices (n is 0 to 2).
1136
1137 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1138 machines - use with special care.
1139
1140 CAUTION: Experimental! User reported problems with this option.
1141 EODESCR
1142 };
1143
1144 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1145 $confdesc->{"parallel$i"} = $paralleldesc;
1146 }
1147
1148 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1149 $confdesc->{"serial$i"} = $serialdesc;
1150 }
1151
1152 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1153 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1154 }
1155
1156 for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1157 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1158 }
1159
1160 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1161 $confdesc->{"usb$i"} = $usbdesc;
1162 }
1163
1164 my $boot_fmt = {
1165 legacy => {
1166 optional => 1,
1167 default_key => 1,
1168 type => 'string',
1169 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1170 . " Deprecated, use 'order=' instead.",
1171 pattern => '[acdn]{1,4}',
1172 format_description => "[acdn]{1,4}",
1173
1174 # note: this is also the fallback if boot: is not given at all
1175 default => 'cdn',
1176 },
1177 order => {
1178 optional => 1,
1179 type => 'string',
1180 format => 'pve-qm-bootdev-list',
1181 format_description => "device[;device...]",
1182 description => <<EODESC,
1183 The guest will attempt to boot from devices in the order they appear here.
1184
1185 Disks, optical drives and passed-through storage USB devices will be directly
1186 booted from, NICs will load PXE, and PCIe devices will either behave like disks
1187 (e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1188
1189 Note that only devices in this list will be marked as bootable and thus loaded
1190 by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1191 (e.g. software-raid), you need to specify all of them here.
1192
1193 Overrides the deprecated 'legacy=[acdn]*' value when given.
1194 EODESC
1195 },
1196 };
1197 PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1198
1199 PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1200 sub verify_bootdev {
1201 my ($dev, $noerr) = @_;
1202
1203 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1204 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1205
1206 my $check = sub {
1207 my ($base) = @_;
1208 return 0 if $dev !~ m/^$base\d+$/;
1209 return 0 if !$confdesc->{$dev};
1210 return 1;
1211 };
1212
1213 return $dev if $check->("net");
1214 return $dev if $check->("usb");
1215 return $dev if $check->("hostpci");
1216
1217 return if $noerr;
1218 die "invalid boot device '$dev'\n";
1219 }
1220
1221 sub print_bootorder {
1222 my ($devs) = @_;
1223 return "" if !@$devs;
1224 my $data = { order => join(';', @$devs) };
1225 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1226 }
1227
1228 my $kvm_api_version = 0;
1229
1230 sub kvm_version {
1231 return $kvm_api_version if $kvm_api_version;
1232
1233 open my $fh, '<', '/dev/kvm' or return;
1234
1235 # 0xae00 => KVM_GET_API_VERSION
1236 $kvm_api_version = ioctl($fh, 0xae00, 0);
1237 close($fh);
1238
1239 return $kvm_api_version;
1240 }
1241
1242 my $kvm_user_version = {};
1243 my $kvm_mtime = {};
1244
1245 sub kvm_user_version {
1246 my ($binary) = @_;
1247
1248 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1249 my $st = stat($binary);
1250
1251 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1252 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1253 $cachedmtime == $st->mtime;
1254
1255 $kvm_user_version->{$binary} = 'unknown';
1256 $kvm_mtime->{$binary} = $st->mtime;
1257
1258 my $code = sub {
1259 my $line = shift;
1260 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1261 $kvm_user_version->{$binary} = $2;
1262 }
1263 };
1264
1265 eval { run_command([$binary, '--version'], outfunc => $code); };
1266 warn $@ if $@;
1267
1268 return $kvm_user_version->{$binary};
1269
1270 }
1271 my sub extract_version {
1272 my ($machine_type, $version) = @_;
1273 $version = kvm_user_version() if !defined($version);
1274 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
1275 }
1276
1277 sub kernel_has_vhost_net {
1278 return -c '/dev/vhost-net';
1279 }
1280
1281 sub option_exists {
1282 my $key = shift;
1283 return defined($confdesc->{$key});
1284 }
1285
1286 my $cdrom_path;
1287 sub get_cdrom_path {
1288
1289 return $cdrom_path if $cdrom_path;
1290
1291 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
1292 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
1293 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
1294 }
1295
1296 sub get_iso_path {
1297 my ($storecfg, $vmid, $cdrom) = @_;
1298
1299 if ($cdrom eq 'cdrom') {
1300 return get_cdrom_path();
1301 } elsif ($cdrom eq 'none') {
1302 return '';
1303 } elsif ($cdrom =~ m|^/|) {
1304 return $cdrom;
1305 } else {
1306 return PVE::Storage::path($storecfg, $cdrom);
1307 }
1308 }
1309
1310 # try to convert old style file names to volume IDs
1311 sub filename_to_volume_id {
1312 my ($vmid, $file, $media) = @_;
1313
1314 if (!($file eq 'none' || $file eq 'cdrom' ||
1315 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1316
1317 return if $file =~ m|/|;
1318
1319 if ($media && $media eq 'cdrom') {
1320 $file = "local:iso/$file";
1321 } else {
1322 $file = "local:$vmid/$file";
1323 }
1324 }
1325
1326 return $file;
1327 }
1328
1329 sub verify_media_type {
1330 my ($opt, $vtype, $media) = @_;
1331
1332 return if !$media;
1333
1334 my $etype;
1335 if ($media eq 'disk') {
1336 $etype = 'images';
1337 } elsif ($media eq 'cdrom') {
1338 $etype = 'iso';
1339 } else {
1340 die "internal error";
1341 }
1342
1343 return if ($vtype eq $etype);
1344
1345 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1346 }
1347
1348 sub cleanup_drive_path {
1349 my ($opt, $storecfg, $drive) = @_;
1350
1351 # try to convert filesystem paths to volume IDs
1352
1353 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1354 ($drive->{file} !~ m|^/dev/.+|) &&
1355 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1356 ($drive->{file} !~ m/^\d+$/)) {
1357 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1358 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1359 if !$vtype;
1360 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1361 verify_media_type($opt, $vtype, $drive->{media});
1362 $drive->{file} = $volid;
1363 }
1364
1365 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1366 }
1367
1368 sub parse_hotplug_features {
1369 my ($data) = @_;
1370
1371 my $res = {};
1372
1373 return $res if $data eq '0';
1374
1375 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1376
1377 foreach my $feature (PVE::Tools::split_list($data)) {
1378 if ($feature =~ m/^(network|disk|cpu|memory|usb|cloudinit)$/) {
1379 $res->{$1} = 1;
1380 } else {
1381 die "invalid hotplug feature '$feature'\n";
1382 }
1383 }
1384 return $res;
1385 }
1386
1387 PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1388 sub pve_verify_hotplug_features {
1389 my ($value, $noerr) = @_;
1390
1391 return $value if parse_hotplug_features($value);
1392
1393 return if $noerr;
1394
1395 die "unable to parse hotplug option\n";
1396 }
1397
1398 sub scsi_inquiry {
1399 my($fh, $noerr) = @_;
1400
1401 my $SG_IO = 0x2285;
1402 my $SG_GET_VERSION_NUM = 0x2282;
1403
1404 my $versionbuf = "\x00" x 8;
1405 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1406 if (!$ret) {
1407 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1408 return;
1409 }
1410 my $version = unpack("I", $versionbuf);
1411 if ($version < 30000) {
1412 die "scsi generic interface too old\n" if !$noerr;
1413 return;
1414 }
1415
1416 my $buf = "\x00" x 36;
1417 my $sensebuf = "\x00" x 8;
1418 my $cmd = pack("C x3 C x1", 0x12, 36);
1419
1420 # see /usr/include/scsi/sg.h
1421 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1422
1423 my $packet = pack(
1424 $sg_io_hdr_t, ord('S'), -3, length($cmd), length($sensebuf), 0, length($buf), $buf, $cmd, $sensebuf, 6000
1425 );
1426
1427 $ret = ioctl($fh, $SG_IO, $packet);
1428 if (!$ret) {
1429 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1430 return;
1431 }
1432
1433 my @res = unpack($sg_io_hdr_t, $packet);
1434 if ($res[17] || $res[18]) {
1435 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1436 return;
1437 }
1438
1439 my $res = {};
1440 $res->@{qw(type removable vendor product revision)} = unpack("C C x6 A8 A16 A4", $buf);
1441
1442 $res->{removable} = $res->{removable} & 128 ? 1 : 0;
1443 $res->{type} &= 0x1F;
1444
1445 return $res;
1446 }
1447
1448 sub path_is_scsi {
1449 my ($path) = @_;
1450
1451 my $fh = IO::File->new("+<$path") || return;
1452 my $res = scsi_inquiry($fh, 1);
1453 close($fh);
1454
1455 return $res;
1456 }
1457
1458 sub print_tabletdevice_full {
1459 my ($conf, $arch) = @_;
1460
1461 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1462
1463 # we use uhci for old VMs because tablet driver was buggy in older qemu
1464 my $usbbus;
1465 if ($q35 || $arch eq 'aarch64') {
1466 $usbbus = 'ehci';
1467 } else {
1468 $usbbus = 'uhci';
1469 }
1470
1471 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1472 }
1473
1474 sub print_keyboarddevice_full {
1475 my ($conf, $arch) = @_;
1476
1477 return if $arch ne 'aarch64';
1478
1479 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1480 }
1481
1482 my sub get_drive_id {
1483 my ($drive) = @_;
1484 return "$drive->{interface}$drive->{index}";
1485 }
1486
1487 sub print_drivedevice_full {
1488 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1489
1490 my $device = '';
1491 my $maxdev = 0;
1492
1493 my $drive_id = get_drive_id($drive);
1494 if ($drive->{interface} eq 'virtio') {
1495 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1496 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1497 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1498 } elsif ($drive->{interface} eq 'scsi') {
1499
1500 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1501 my $unit = $drive->{index} % $maxdev;
1502 my $devicetype = 'hd';
1503 my $path = '';
1504 if (drive_is_cdrom($drive)) {
1505 $devicetype = 'cd';
1506 } else {
1507 if ($drive->{file} =~ m|^/|) {
1508 $path = $drive->{file};
1509 if (my $info = path_is_scsi($path)) {
1510 if ($info->{type} == 0 && $drive->{scsiblock}) {
1511 $devicetype = 'block';
1512 } elsif ($info->{type} == 1) { # tape
1513 $devicetype = 'generic';
1514 }
1515 }
1516 } else {
1517 $path = PVE::Storage::path($storecfg, $drive->{file});
1518 }
1519
1520 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
1521 my $version = extract_version($machine_type, kvm_user_version());
1522 if ($path =~ m/^iscsi\:\/\// &&
1523 !min_version($version, 4, 1)) {
1524 $devicetype = 'generic';
1525 }
1526 }
1527
1528 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1529 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
1530 } else {
1531 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1532 .",lun=$drive->{index}";
1533 }
1534 $device .= ",drive=drive-$drive_id,id=$drive_id";
1535
1536 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1537 $device .= ",rotation_rate=1";
1538 }
1539 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1540
1541 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1542 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1543 my $controller = int($drive->{index} / $maxdev);
1544 my $unit = $drive->{index} % $maxdev;
1545 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1546
1547 $device = "ide-$devicetype";
1548 if ($drive->{interface} eq 'ide') {
1549 $device .= ",bus=ide.$controller,unit=$unit";
1550 } else {
1551 $device .= ",bus=ahci$controller.$unit";
1552 }
1553 $device .= ",drive=drive-$drive_id,id=$drive_id";
1554
1555 if ($devicetype eq 'hd') {
1556 if (my $model = $drive->{model}) {
1557 $model = URI::Escape::uri_unescape($model);
1558 $device .= ",model=$model";
1559 }
1560 if ($drive->{ssd}) {
1561 $device .= ",rotation_rate=1";
1562 }
1563 }
1564 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1565 } elsif ($drive->{interface} eq 'usb') {
1566 die "implement me";
1567 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1568 } else {
1569 die "unsupported interface type";
1570 }
1571
1572 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1573
1574 if (my $serial = $drive->{serial}) {
1575 $serial = URI::Escape::uri_unescape($serial);
1576 $device .= ",serial=$serial";
1577 }
1578
1579
1580 return $device;
1581 }
1582
1583 sub get_initiator_name {
1584 my $initiator;
1585
1586 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1587 while (defined(my $line = <$fh>)) {
1588 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1589 $initiator = $1;
1590 last;
1591 }
1592 $fh->close();
1593
1594 return $initiator;
1595 }
1596
1597 sub print_drive_commandline_full {
1598 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1599
1600 my $path;
1601 my $volid = $drive->{file};
1602 my $format = $drive->{format};
1603 my $drive_id = get_drive_id($drive);
1604
1605 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1606 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1607
1608 if (drive_is_cdrom($drive)) {
1609 $path = get_iso_path($storecfg, $vmid, $volid);
1610 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
1611 } else {
1612 if ($storeid) {
1613 $path = PVE::Storage::path($storecfg, $volid);
1614 $format //= qemu_img_format($scfg, $volname);
1615 } else {
1616 $path = $volid;
1617 $format //= "raw";
1618 }
1619 }
1620
1621 my $is_rbd = $path =~ m/^rbd:/;
1622
1623 my $opts = '';
1624 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1625 foreach my $o (@qemu_drive_options) {
1626 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1627 }
1628
1629 # snapshot only accepts on|off
1630 if (defined($drive->{snapshot})) {
1631 my $v = $drive->{snapshot} ? 'on' : 'off';
1632 $opts .= ",snapshot=$v";
1633 }
1634
1635 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1636 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
1637 }
1638
1639 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1640 my ($dir, $qmpname) = @$type;
1641 if (my $v = $drive->{"mbps$dir"}) {
1642 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1643 }
1644 if (my $v = $drive->{"mbps${dir}_max"}) {
1645 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1646 }
1647 if (my $v = $drive->{"bps${dir}_max_length"}) {
1648 $opts .= ",throttling.bps$qmpname-max-length=$v";
1649 }
1650 if (my $v = $drive->{"iops${dir}"}) {
1651 $opts .= ",throttling.iops$qmpname=$v";
1652 }
1653 if (my $v = $drive->{"iops${dir}_max"}) {
1654 $opts .= ",throttling.iops$qmpname-max=$v";
1655 }
1656 if (my $v = $drive->{"iops${dir}_max_length"}) {
1657 $opts .= ",throttling.iops$qmpname-max-length=$v";
1658 }
1659 }
1660
1661 if ($pbs_name) {
1662 $format = "rbd" if $is_rbd;
1663 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1664 if !$format;
1665 $opts .= ",format=alloc-track,file.driver=$format";
1666 } elsif ($format) {
1667 $opts .= ",format=$format";
1668 }
1669
1670 my $cache_direct = 0;
1671
1672 if (my $cache = $drive->{cache}) {
1673 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1674 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1675 $opts .= ",cache=none";
1676 $cache_direct = 1;
1677 }
1678
1679 # io_uring with cache mode writeback or writethrough on krbd will hang...
1680 my $rbd_no_io_uring = $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1681
1682 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1683 # sometimes, just plain disable...
1684 my $lvm_no_io_uring = $scfg && $scfg->{type} eq 'lvm';
1685
1686 # io_uring causes problems when used with CIFS since kernel 5.15
1687 # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
1688 my $cifs_no_io_uring = $scfg && $scfg->{type} eq 'cifs';
1689
1690 if (!$drive->{aio}) {
1691 if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring && !$cifs_no_io_uring) {
1692 # io_uring supports all cache modes
1693 $opts .= ",aio=io_uring";
1694 } else {
1695 # aio native works only with O_DIRECT
1696 if($cache_direct) {
1697 $opts .= ",aio=native";
1698 } else {
1699 $opts .= ",aio=threads";
1700 }
1701 }
1702 }
1703
1704 if (!drive_is_cdrom($drive)) {
1705 my $detectzeroes;
1706 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1707 $detectzeroes = 'off';
1708 } elsif ($drive->{discard}) {
1709 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1710 } else {
1711 # This used to be our default with discard not being specified:
1712 $detectzeroes = 'on';
1713 }
1714
1715 # note: 'detect-zeroes' works per blockdev and we want it to persist
1716 # after the alloc-track is removed, so put it on 'file' directly
1717 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1718 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1719 }
1720
1721 if ($pbs_name) {
1722 $opts .= ",backing=$pbs_name";
1723 $opts .= ",auto-remove=on";
1724 }
1725
1726 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1727 my $file_param = "file";
1728 if ($pbs_name) {
1729 # non-rbd drivers require the underlying file to be a seperate block
1730 # node, so add a second .file indirection
1731 $file_param .= ".file" if !$is_rbd;
1732 $file_param .= ".filename";
1733 }
1734 my $pathinfo = $path ? "$file_param=$path," : '';
1735
1736 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1737 }
1738
1739 sub print_pbs_blockdev {
1740 my ($pbs_conf, $pbs_name) = @_;
1741 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1742 $blockdev .= ",repository=$pbs_conf->{repository}";
1743 $blockdev .= ",namespace=$pbs_conf->{namespace}" if $pbs_conf->{namespace};
1744 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1745 $blockdev .= ",archive=$pbs_conf->{archive}";
1746 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1747 return $blockdev;
1748 }
1749
1750 sub print_netdevice_full {
1751 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type) = @_;
1752
1753 my $device = $net->{model};
1754 if ($net->{model} eq 'virtio') {
1755 $device = 'virtio-net-pci';
1756 };
1757
1758 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1759 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1760 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1761 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1762 # and out of each queue plus one config interrupt and control vector queue
1763 my $vectors = $net->{queues} * 2 + 2;
1764 $tmpstr .= ",vectors=$vectors,mq=on";
1765 }
1766 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1767
1768 if (my $mtu = $net->{mtu}) {
1769 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1770 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1771 if ($mtu == 1) {
1772 $mtu = $bridge_mtu;
1773 } elsif ($mtu < 576) {
1774 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1775 } elsif ($mtu > $bridge_mtu) {
1776 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1777 }
1778 $tmpstr .= ",host_mtu=$mtu";
1779 } else {
1780 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1781 }
1782 }
1783
1784 if ($use_old_bios_files) {
1785 my $romfile;
1786 if ($device eq 'virtio-net-pci') {
1787 $romfile = 'pxe-virtio.rom';
1788 } elsif ($device eq 'e1000') {
1789 $romfile = 'pxe-e1000.rom';
1790 } elsif ($device eq 'e1000e') {
1791 $romfile = 'pxe-e1000e.rom';
1792 } elsif ($device eq 'ne2k') {
1793 $romfile = 'pxe-ne2k_pci.rom';
1794 } elsif ($device eq 'pcnet') {
1795 $romfile = 'pxe-pcnet.rom';
1796 } elsif ($device eq 'rtl8139') {
1797 $romfile = 'pxe-rtl8139.rom';
1798 }
1799 $tmpstr .= ",romfile=$romfile" if $romfile;
1800 }
1801
1802 return $tmpstr;
1803 }
1804
1805 sub print_netdev_full {
1806 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1807
1808 my $i = '';
1809 if ($netid =~ m/^net(\d+)$/) {
1810 $i = int($1);
1811 }
1812
1813 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1814
1815 my $ifname = "tap${vmid}i$i";
1816
1817 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1818 die "interface name '$ifname' is too long (max 15 character)\n"
1819 if length($ifname) >= 16;
1820
1821 my $vhostparam = '';
1822 if (is_native($arch)) {
1823 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1824 }
1825
1826 my $vmname = $conf->{name} || "vm$vmid";
1827
1828 my $netdev = "";
1829 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1830
1831 if ($net->{bridge}) {
1832 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1833 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1834 } else {
1835 $netdev = "type=user,id=$netid,hostname=$vmname";
1836 }
1837
1838 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1839
1840 return $netdev;
1841 }
1842
1843 my $vga_map = {
1844 'cirrus' => 'cirrus-vga',
1845 'std' => 'VGA',
1846 'vmware' => 'vmware-svga',
1847 'virtio' => 'virtio-vga',
1848 'virtio-gl' => 'virtio-vga-gl',
1849 };
1850
1851 sub print_vga_device {
1852 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1853
1854 my $type = $vga_map->{$vga->{type}};
1855 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1856 $type = 'virtio-gpu';
1857 }
1858 my $vgamem_mb = $vga->{memory};
1859
1860 my $max_outputs = '';
1861 if ($qxlnum) {
1862 $type = $id ? 'qxl' : 'qxl-vga';
1863
1864 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1865 # set max outputs so linux can have up to 4 qxl displays with one device
1866 if (min_version($machine_version, 4, 1)) {
1867 $max_outputs = ",max_outputs=4";
1868 }
1869 }
1870 }
1871
1872 die "no devicetype for $vga->{type}\n" if !$type;
1873
1874 my $memory = "";
1875 if ($vgamem_mb) {
1876 if ($vga->{type} =~ /^virtio/) {
1877 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1878 $memory = ",max_hostmem=$bytes";
1879 } elsif ($qxlnum) {
1880 # from https://www.spice-space.org/multiple-monitors.html
1881 $memory = ",vgamem_mb=$vga->{memory}";
1882 my $ram = $vgamem_mb * 4;
1883 my $vram = $vgamem_mb * 2;
1884 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1885 } else {
1886 $memory = ",vgamem_mb=$vga->{memory}";
1887 }
1888 } elsif ($qxlnum && $id) {
1889 $memory = ",ram_size=67108864,vram_size=33554432";
1890 }
1891
1892 my $edidoff = "";
1893 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1894 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1895 }
1896
1897 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1898 my $vgaid = "vga" . ($id // '');
1899 my $pciaddr;
1900 if ($q35 && $vgaid eq 'vga') {
1901 # the first display uses pcie.0 bus on q35 machines
1902 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1903 } else {
1904 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1905 }
1906
1907 if ($vga->{type} eq 'virtio-gl') {
1908 my $base = '/usr/lib/x86_64-linux-gnu/lib';
1909 die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
1910 if !-e "${base}EGL.so.1" || !-e "${base}GL.so.1";
1911
1912 die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
1913 if !PVE::Tools::dir_glob_regex('/dev/dri/', "renderD.*");
1914 }
1915
1916 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1917 }
1918
1919 sub parse_number_sets {
1920 my ($set) = @_;
1921 my $res = [];
1922 foreach my $part (split(/;/, $set)) {
1923 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1924 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1925 push @$res, [ $1, $2 ];
1926 } else {
1927 die "invalid range: $part\n";
1928 }
1929 }
1930 return $res;
1931 }
1932
1933 sub parse_numa {
1934 my ($data) = @_;
1935
1936 my $res = parse_property_string($numa_fmt, $data);
1937 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1938 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
1939 return $res;
1940 }
1941
1942 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1943 sub parse_net {
1944 my ($data) = @_;
1945
1946 my $res = eval { parse_property_string($net_fmt, $data) };
1947 if ($@) {
1948 warn $@;
1949 return;
1950 }
1951 if (!defined($res->{macaddr})) {
1952 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1953 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1954 }
1955 return $res;
1956 }
1957
1958 # ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1959 sub parse_ipconfig {
1960 my ($data) = @_;
1961
1962 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1963 if ($@) {
1964 warn $@;
1965 return;
1966 }
1967
1968 if ($res->{gw} && !$res->{ip}) {
1969 warn 'gateway specified without specifying an IP address';
1970 return;
1971 }
1972 if ($res->{gw6} && !$res->{ip6}) {
1973 warn 'IPv6 gateway specified without specifying an IPv6 address';
1974 return;
1975 }
1976 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1977 warn 'gateway specified together with DHCP';
1978 return;
1979 }
1980 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1981 # gw6 + auto/dhcp
1982 warn "IPv6 gateway specified together with $res->{ip6} address";
1983 return;
1984 }
1985
1986 if (!$res->{ip} && !$res->{ip6}) {
1987 return { ip => 'dhcp', ip6 => 'dhcp' };
1988 }
1989
1990 return $res;
1991 }
1992
1993 sub print_net {
1994 my $net = shift;
1995
1996 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1997 }
1998
1999 sub add_random_macs {
2000 my ($settings) = @_;
2001
2002 foreach my $opt (keys %$settings) {
2003 next if $opt !~ m/^net(\d+)$/;
2004 my $net = parse_net($settings->{$opt});
2005 next if !$net;
2006 $settings->{$opt} = print_net($net);
2007 }
2008 }
2009
2010 sub vm_is_volid_owner {
2011 my ($storecfg, $vmid, $volid) = @_;
2012
2013 if ($volid !~ m|^/|) {
2014 my ($path, $owner);
2015 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
2016 if ($owner && ($owner == $vmid)) {
2017 return 1;
2018 }
2019 }
2020
2021 return;
2022 }
2023
2024 sub vmconfig_register_unused_drive {
2025 my ($storecfg, $vmid, $conf, $drive) = @_;
2026
2027 if (drive_is_cloudinit($drive)) {
2028 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
2029 warn $@ if $@;
2030 delete $conf->{cloudinit};
2031 } elsif (!drive_is_cdrom($drive)) {
2032 my $volid = $drive->{file};
2033 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
2034 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
2035 }
2036 }
2037 }
2038
2039 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
2040 my $smbios1_fmt = {
2041 uuid => {
2042 type => 'string',
2043 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
2044 format_description => 'UUID',
2045 description => "Set SMBIOS1 UUID.",
2046 optional => 1,
2047 },
2048 version => {
2049 type => 'string',
2050 pattern => '[A-Za-z0-9+\/]+={0,2}',
2051 format_description => 'Base64 encoded string',
2052 description => "Set SMBIOS1 version.",
2053 optional => 1,
2054 },
2055 serial => {
2056 type => 'string',
2057 pattern => '[A-Za-z0-9+\/]+={0,2}',
2058 format_description => 'Base64 encoded string',
2059 description => "Set SMBIOS1 serial number.",
2060 optional => 1,
2061 },
2062 manufacturer => {
2063 type => 'string',
2064 pattern => '[A-Za-z0-9+\/]+={0,2}',
2065 format_description => 'Base64 encoded string',
2066 description => "Set SMBIOS1 manufacturer.",
2067 optional => 1,
2068 },
2069 product => {
2070 type => 'string',
2071 pattern => '[A-Za-z0-9+\/]+={0,2}',
2072 format_description => 'Base64 encoded string',
2073 description => "Set SMBIOS1 product ID.",
2074 optional => 1,
2075 },
2076 sku => {
2077 type => 'string',
2078 pattern => '[A-Za-z0-9+\/]+={0,2}',
2079 format_description => 'Base64 encoded string',
2080 description => "Set SMBIOS1 SKU string.",
2081 optional => 1,
2082 },
2083 family => {
2084 type => 'string',
2085 pattern => '[A-Za-z0-9+\/]+={0,2}',
2086 format_description => 'Base64 encoded string',
2087 description => "Set SMBIOS1 family string.",
2088 optional => 1,
2089 },
2090 base64 => {
2091 type => 'boolean',
2092 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2093 optional => 1,
2094 },
2095 };
2096
2097 sub parse_smbios1 {
2098 my ($data) = @_;
2099
2100 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2101 warn $@ if $@;
2102 return $res;
2103 }
2104
2105 sub print_smbios1 {
2106 my ($smbios1) = @_;
2107 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2108 }
2109
2110 PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2111
2112 sub parse_watchdog {
2113 my ($value) = @_;
2114
2115 return if !$value;
2116
2117 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2118 warn $@ if $@;
2119 return $res;
2120 }
2121
2122 sub parse_guest_agent {
2123 my ($conf) = @_;
2124
2125 return {} if !defined($conf->{agent});
2126
2127 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2128 warn $@ if $@;
2129
2130 # if the agent is disabled ignore the other potentially set properties
2131 return {} if !$res->{enabled};
2132 return $res;
2133 }
2134
2135 sub get_qga_key {
2136 my ($conf, $key) = @_;
2137 return undef if !defined($conf->{agent});
2138
2139 my $agent = parse_guest_agent($conf);
2140 return $agent->{$key};
2141 }
2142
2143 sub parse_vga {
2144 my ($value) = @_;
2145
2146 return {} if !$value;
2147 my $res = eval { parse_property_string($vga_fmt, $value) };
2148 warn $@ if $@;
2149 return $res;
2150 }
2151
2152 sub parse_rng {
2153 my ($value) = @_;
2154
2155 return if !$value;
2156
2157 my $res = eval { parse_property_string($rng_fmt, $value) };
2158 warn $@ if $@;
2159 return $res;
2160 }
2161
2162 sub parse_meta_info {
2163 my ($value) = @_;
2164
2165 return if !$value;
2166
2167 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2168 warn $@ if $@;
2169 return $res;
2170 }
2171
2172 sub new_meta_info_string {
2173 my () = @_; # for now do not allow to override any value
2174
2175 return PVE::JSONSchema::print_property_string(
2176 {
2177 'creation-qemu' => kvm_user_version(),
2178 ctime => "". int(time()),
2179 },
2180 $meta_info_fmt
2181 );
2182 }
2183
2184 sub qemu_created_version_fixups {
2185 my ($conf, $forcemachine, $kvmver) = @_;
2186
2187 my $meta = parse_meta_info($conf->{meta}) // {};
2188 my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
2189
2190 # check if we need to apply some handling for VMs that always use the latest machine version but
2191 # had a machine version transition happen that affected HW such that, e.g., an OS config change
2192 # would be required (we do not want to pin machine version for non-windows OS type)
2193 if (
2194 (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
2195 && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
2196 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
2197 && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
2198 ) {
2199 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
2200 if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
2201 # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
2202 # and thus with the predictable interface naming of systemd
2203 return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
2204 }
2205 }
2206 return;
2207 }
2208
2209 PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
2210 sub verify_usb_device {
2211 my ($value, $noerr) = @_;
2212
2213 return $value if parse_usb_device($value);
2214
2215 return if $noerr;
2216
2217 die "unable to parse usb device\n";
2218 }
2219
2220 # add JSON properties for create and set function
2221 sub json_config_properties {
2222 my ($prop, $with_disk_alloc) = @_;
2223
2224 my $skip_json_config_opts = {
2225 parent => 1,
2226 snaptime => 1,
2227 vmstate => 1,
2228 runningmachine => 1,
2229 runningcpu => 1,
2230 meta => 1,
2231 };
2232
2233 foreach my $opt (keys %$confdesc) {
2234 next if $skip_json_config_opts->{$opt};
2235
2236 if ($with_disk_alloc && is_valid_drivename($opt)) {
2237 $prop->{$opt} = $PVE::QemuServer::Drive::drivedesc_hash_with_alloc->{$opt};
2238 } else {
2239 $prop->{$opt} = $confdesc->{$opt};
2240 }
2241 }
2242
2243 return $prop;
2244 }
2245
2246 # Properties that we can read from an OVF file
2247 sub json_ovf_properties {
2248 my $prop = {};
2249
2250 for my $device (PVE::QemuServer::Drive::valid_drive_names()) {
2251 $prop->{$device} = {
2252 type => 'string',
2253 format => 'pve-volume-id-or-absolute-path',
2254 description => "Disk image that gets imported to $device",
2255 optional => 1,
2256 };
2257 }
2258
2259 $prop->{cores} = {
2260 type => 'integer',
2261 description => "The number of CPU cores.",
2262 optional => 1,
2263 };
2264 $prop->{memory} = {
2265 type => 'integer',
2266 description => "Amount of RAM for the VM in MB.",
2267 optional => 1,
2268 };
2269 $prop->{name} = {
2270 type => 'string',
2271 description => "Name of the VM.",
2272 optional => 1,
2273 };
2274
2275 return $prop;
2276 }
2277
2278 # return copy of $confdesc_cloudinit to generate documentation
2279 sub cloudinit_config_properties {
2280
2281 return dclone($confdesc_cloudinit);
2282 }
2283
2284 sub check_type {
2285 my ($key, $value) = @_;
2286
2287 die "unknown setting '$key'\n" if !$confdesc->{$key};
2288
2289 my $type = $confdesc->{$key}->{type};
2290
2291 if (!defined($value)) {
2292 die "got undefined value\n";
2293 }
2294
2295 if ($value =~ m/[\n\r]/) {
2296 die "property contains a line feed\n";
2297 }
2298
2299 if ($type eq 'boolean') {
2300 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2301 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2302 die "type check ('boolean') failed - got '$value'\n";
2303 } elsif ($type eq 'integer') {
2304 return int($1) if $value =~ m/^(\d+)$/;
2305 die "type check ('integer') failed - got '$value'\n";
2306 } elsif ($type eq 'number') {
2307 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2308 die "type check ('number') failed - got '$value'\n";
2309 } elsif ($type eq 'string') {
2310 if (my $fmt = $confdesc->{$key}->{format}) {
2311 PVE::JSONSchema::check_format($fmt, $value);
2312 return $value;
2313 }
2314 $value =~ s/^\"(.*)\"$/$1/;
2315 return $value;
2316 } else {
2317 die "internal error"
2318 }
2319 }
2320
2321 sub destroy_vm {
2322 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2323
2324 my $conf = PVE::QemuConfig->load_config($vmid);
2325
2326 PVE::QemuConfig->check_lock($conf) if !$skiplock;
2327
2328 if ($conf->{template}) {
2329 # check if any base image is still used by a linked clone
2330 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2331 my ($ds, $drive) = @_;
2332 return if drive_is_cdrom($drive);
2333
2334 my $volid = $drive->{file};
2335 return if !$volid || $volid =~ m|^/|;
2336
2337 die "base volume '$volid' is still in use by linked cloned\n"
2338 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2339
2340 });
2341 }
2342
2343 my $volids = {};
2344 my $remove_owned_drive = sub {
2345 my ($ds, $drive) = @_;
2346 return if drive_is_cdrom($drive, 1);
2347
2348 my $volid = $drive->{file};
2349 return if !$volid || $volid =~ m|^/|;
2350 return if $volids->{$volid};
2351
2352 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2353 return if !$path || !$owner || ($owner != $vmid);
2354
2355 $volids->{$volid} = 1;
2356 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2357 warn "Could not remove disk '$volid', check manually: $@" if $@;
2358 };
2359
2360 # only remove disks owned by this VM (referenced in the config)
2361 my $include_opts = {
2362 include_unused => 1,
2363 extra_keys => ['vmstate'],
2364 };
2365 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2366
2367 for my $snap (values %{$conf->{snapshots}}) {
2368 next if !defined($snap->{vmstate});
2369 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2370 next if !defined($drive);
2371 $remove_owned_drive->('vmstate', $drive);
2372 }
2373
2374 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2375
2376 if ($purge_unreferenced) { # also remove unreferenced disk
2377 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2378 PVE::Storage::foreach_volid($vmdisks, sub {
2379 my ($volid, $sid, $volname, $d) = @_;
2380 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2381 warn $@ if $@;
2382 });
2383 }
2384
2385 if (defined $replacement_conf) {
2386 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2387 } else {
2388 PVE::QemuConfig->destroy_config($vmid);
2389 }
2390 }
2391
2392 sub parse_vm_config {
2393 my ($filename, $raw, $strict) = @_;
2394
2395 return if !defined($raw);
2396
2397 my $res = {
2398 digest => Digest::SHA::sha1_hex($raw),
2399 snapshots => {},
2400 pending => {},
2401 cloudinit => {},
2402 };
2403
2404 my $handle_error = sub {
2405 my ($msg) = @_;
2406
2407 if ($strict) {
2408 die $msg;
2409 } else {
2410 warn $msg;
2411 }
2412 };
2413
2414 $filename =~ m|/qemu-server/(\d+)\.conf$|
2415 || die "got strange filename '$filename'";
2416
2417 my $vmid = $1;
2418
2419 my $conf = $res;
2420 my $descr;
2421 my $section = '';
2422
2423 my @lines = split(/\n/, $raw);
2424 foreach my $line (@lines) {
2425 next if $line =~ m/^\s*$/;
2426
2427 if ($line =~ m/^\[PENDING\]\s*$/i) {
2428 $section = 'pending';
2429 if (defined($descr)) {
2430 $descr =~ s/\s+$//;
2431 $conf->{description} = $descr;
2432 }
2433 $descr = undef;
2434 $conf = $res->{$section} = {};
2435 next;
2436 } elsif ($line =~ m/^\[special:cloudinit\]\s*$/i) {
2437 $section = 'cloudinit';
2438 $descr = undef;
2439 $conf = $res->{$section} = {};
2440 next;
2441
2442 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2443 $section = $1;
2444 if (defined($descr)) {
2445 $descr =~ s/\s+$//;
2446 $conf->{description} = $descr;
2447 }
2448 $descr = undef;
2449 $conf = $res->{snapshots}->{$section} = {};
2450 next;
2451 }
2452
2453 if ($line =~ m/^\#(.*)$/) {
2454 $descr = '' if !defined($descr);
2455 $descr .= PVE::Tools::decode_text($1) . "\n";
2456 next;
2457 }
2458
2459 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2460 $descr = '' if !defined($descr);
2461 $descr .= PVE::Tools::decode_text($2);
2462 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2463 $conf->{snapstate} = $1;
2464 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2465 my $key = $1;
2466 my $value = $2;
2467 $conf->{$key} = $value;
2468 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2469 my $value = $1;
2470 if ($section eq 'pending') {
2471 $conf->{delete} = $value; # we parse this later
2472 } else {
2473 $handle_error->("vm $vmid - property 'delete' is only allowed in [PENDING]\n");
2474 }
2475 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2476 my $key = $1;
2477 my $value = $2;
2478 eval { $value = check_type($key, $value); };
2479 if ($@) {
2480 $handle_error->("vm $vmid - unable to parse value of '$key' - $@");
2481 } else {
2482 $key = 'ide2' if $key eq 'cdrom';
2483 my $fmt = $confdesc->{$key}->{format};
2484 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2485 my $v = parse_drive($key, $value);
2486 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2487 $v->{file} = $volid;
2488 $value = print_drive($v);
2489 } else {
2490 $handle_error->("vm $vmid - unable to parse value of '$key'\n");
2491 next;
2492 }
2493 }
2494
2495 $conf->{$key} = $value;
2496 }
2497 } else {
2498 $handle_error->("vm $vmid - unable to parse config: $line\n");
2499 }
2500 }
2501
2502 if (defined($descr)) {
2503 $descr =~ s/\s+$//;
2504 $conf->{description} = $descr;
2505 }
2506 delete $res->{snapstate}; # just to be sure
2507
2508 return $res;
2509 }
2510
2511 sub write_vm_config {
2512 my ($filename, $conf) = @_;
2513
2514 delete $conf->{snapstate}; # just to be sure
2515
2516 if ($conf->{cdrom}) {
2517 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2518 $conf->{ide2} = $conf->{cdrom};
2519 delete $conf->{cdrom};
2520 }
2521
2522 # we do not use 'smp' any longer
2523 if ($conf->{sockets}) {
2524 delete $conf->{smp};
2525 } elsif ($conf->{smp}) {
2526 $conf->{sockets} = $conf->{smp};
2527 delete $conf->{cores};
2528 delete $conf->{smp};
2529 }
2530
2531 my $used_volids = {};
2532
2533 my $cleanup_config = sub {
2534 my ($cref, $pending, $snapname) = @_;
2535
2536 foreach my $key (keys %$cref) {
2537 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2538 $key eq 'snapstate' || $key eq 'pending' || $key eq 'cloudinit';
2539 my $value = $cref->{$key};
2540 if ($key eq 'delete') {
2541 die "propertry 'delete' is only allowed in [PENDING]\n"
2542 if !$pending;
2543 # fixme: check syntax?
2544 next;
2545 }
2546 eval { $value = check_type($key, $value); };
2547 die "unable to parse value of '$key' - $@" if $@;
2548
2549 $cref->{$key} = $value;
2550
2551 if (!$snapname && is_valid_drivename($key)) {
2552 my $drive = parse_drive($key, $value);
2553 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2554 }
2555 }
2556 };
2557
2558 &$cleanup_config($conf);
2559
2560 &$cleanup_config($conf->{pending}, 1);
2561
2562 &$cleanup_config($conf->{cloudinit});
2563
2564 foreach my $snapname (keys %{$conf->{snapshots}}) {
2565 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2566 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2567 }
2568
2569 # remove 'unusedX' settings if we re-add a volume
2570 foreach my $key (keys %$conf) {
2571 my $value = $conf->{$key};
2572 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2573 delete $conf->{$key};
2574 }
2575 }
2576
2577 my $generate_raw_config = sub {
2578 my ($conf, $pending) = @_;
2579
2580 my $raw = '';
2581
2582 # add description as comment to top of file
2583 if (defined(my $descr = $conf->{description})) {
2584 if ($descr) {
2585 foreach my $cl (split(/\n/, $descr)) {
2586 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2587 }
2588 } else {
2589 $raw .= "#\n" if $pending;
2590 }
2591 }
2592
2593 foreach my $key (sort keys %$conf) {
2594 next if $key =~ /^(digest|description|pending|cloudinit|snapshots)$/;
2595 $raw .= "$key: $conf->{$key}\n";
2596 }
2597 return $raw;
2598 };
2599
2600 my $raw = &$generate_raw_config($conf);
2601
2602 if (scalar(keys %{$conf->{pending}})){
2603 $raw .= "\n[PENDING]\n";
2604 $raw .= &$generate_raw_config($conf->{pending}, 1);
2605 }
2606
2607 if (scalar(keys %{$conf->{cloudinit}})){
2608 $raw .= "\n[special:cloudinit]\n";
2609 $raw .= &$generate_raw_config($conf->{cloudinit});
2610 }
2611
2612 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2613 $raw .= "\n[$snapname]\n";
2614 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2615 }
2616
2617 return $raw;
2618 }
2619
2620 sub load_defaults {
2621
2622 my $res = {};
2623
2624 # we use static defaults from our JSON schema configuration
2625 foreach my $key (keys %$confdesc) {
2626 if (defined(my $default = $confdesc->{$key}->{default})) {
2627 $res->{$key} = $default;
2628 }
2629 }
2630
2631 return $res;
2632 }
2633
2634 sub config_list {
2635 my $vmlist = PVE::Cluster::get_vmlist();
2636 my $res = {};
2637 return $res if !$vmlist || !$vmlist->{ids};
2638 my $ids = $vmlist->{ids};
2639 my $nodename = nodename();
2640
2641 foreach my $vmid (keys %$ids) {
2642 my $d = $ids->{$vmid};
2643 next if !$d->{node} || $d->{node} ne $nodename;
2644 next if !$d->{type} || $d->{type} ne 'qemu';
2645 $res->{$vmid}->{exists} = 1;
2646 }
2647 return $res;
2648 }
2649
2650 # test if VM uses local resources (to prevent migration)
2651 sub check_local_resources {
2652 my ($conf, $noerr) = @_;
2653
2654 my @loc_res = ();
2655
2656 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2657 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2658
2659 push @loc_res, "ivshmem" if $conf->{ivshmem};
2660
2661 foreach my $k (keys %$conf) {
2662 next if $k =~ m/^usb/ && ($conf->{$k} =~ m/^spice(?![^,])/);
2663 # sockets are safe: they will recreated be on the target side post-migrate
2664 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2665 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2666 }
2667
2668 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2669
2670 return \@loc_res;
2671 }
2672
2673 # check if used storages are available on all nodes (use by migrate)
2674 sub check_storage_availability {
2675 my ($storecfg, $conf, $node) = @_;
2676
2677 PVE::QemuConfig->foreach_volume($conf, sub {
2678 my ($ds, $drive) = @_;
2679
2680 my $volid = $drive->{file};
2681 return if !$volid;
2682
2683 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2684 return if !$sid;
2685
2686 # check if storage is available on both nodes
2687 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2688 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2689
2690 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2691
2692 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2693 if !$scfg->{content}->{$vtype};
2694 });
2695 }
2696
2697 # list nodes where all VM images are available (used by has_feature API)
2698 sub shared_nodes {
2699 my ($conf, $storecfg) = @_;
2700
2701 my $nodelist = PVE::Cluster::get_nodelist();
2702 my $nodehash = { map { $_ => 1 } @$nodelist };
2703 my $nodename = nodename();
2704
2705 PVE::QemuConfig->foreach_volume($conf, sub {
2706 my ($ds, $drive) = @_;
2707
2708 my $volid = $drive->{file};
2709 return if !$volid;
2710
2711 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2712 if ($storeid) {
2713 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2714 if ($scfg->{disable}) {
2715 $nodehash = {};
2716 } elsif (my $avail = $scfg->{nodes}) {
2717 foreach my $node (keys %$nodehash) {
2718 delete $nodehash->{$node} if !$avail->{$node};
2719 }
2720 } elsif (!$scfg->{shared}) {
2721 foreach my $node (keys %$nodehash) {
2722 delete $nodehash->{$node} if $node ne $nodename
2723 }
2724 }
2725 }
2726 });
2727
2728 return $nodehash
2729 }
2730
2731 sub check_local_storage_availability {
2732 my ($conf, $storecfg) = @_;
2733
2734 my $nodelist = PVE::Cluster::get_nodelist();
2735 my $nodehash = { map { $_ => {} } @$nodelist };
2736
2737 PVE::QemuConfig->foreach_volume($conf, sub {
2738 my ($ds, $drive) = @_;
2739
2740 my $volid = $drive->{file};
2741 return if !$volid;
2742
2743 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2744 if ($storeid) {
2745 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2746
2747 if ($scfg->{disable}) {
2748 foreach my $node (keys %$nodehash) {
2749 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2750 }
2751 } elsif (my $avail = $scfg->{nodes}) {
2752 foreach my $node (keys %$nodehash) {
2753 if (!$avail->{$node}) {
2754 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2755 }
2756 }
2757 }
2758 }
2759 });
2760
2761 foreach my $node (values %$nodehash) {
2762 if (my $unavail = $node->{unavailable_storages}) {
2763 $node->{unavailable_storages} = [ sort keys %$unavail ];
2764 }
2765 }
2766
2767 return $nodehash
2768 }
2769
2770 # Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2771 sub check_running {
2772 my ($vmid, $nocheck, $node) = @_;
2773
2774 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2775 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2776 }
2777
2778 sub vzlist {
2779
2780 my $vzlist = config_list();
2781
2782 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2783
2784 while (defined(my $de = $fd->read)) {
2785 next if $de !~ m/^(\d+)\.pid$/;
2786 my $vmid = $1;
2787 next if !defined($vzlist->{$vmid});
2788 if (my $pid = check_running($vmid)) {
2789 $vzlist->{$vmid}->{pid} = $pid;
2790 }
2791 }
2792
2793 return $vzlist;
2794 }
2795
2796 our $vmstatus_return_properties = {
2797 vmid => get_standard_option('pve-vmid'),
2798 status => {
2799 description => "Qemu process status.",
2800 type => 'string',
2801 enum => ['stopped', 'running'],
2802 },
2803 maxmem => {
2804 description => "Maximum memory in bytes.",
2805 type => 'integer',
2806 optional => 1,
2807 renderer => 'bytes',
2808 },
2809 maxdisk => {
2810 description => "Root disk size in bytes.",
2811 type => 'integer',
2812 optional => 1,
2813 renderer => 'bytes',
2814 },
2815 name => {
2816 description => "VM name.",
2817 type => 'string',
2818 optional => 1,
2819 },
2820 qmpstatus => {
2821 description => "Qemu QMP agent status.",
2822 type => 'string',
2823 optional => 1,
2824 },
2825 pid => {
2826 description => "PID of running qemu process.",
2827 type => 'integer',
2828 optional => 1,
2829 },
2830 uptime => {
2831 description => "Uptime.",
2832 type => 'integer',
2833 optional => 1,
2834 renderer => 'duration',
2835 },
2836 cpus => {
2837 description => "Maximum usable CPUs.",
2838 type => 'number',
2839 optional => 1,
2840 },
2841 lock => {
2842 description => "The current config lock, if any.",
2843 type => 'string',
2844 optional => 1,
2845 },
2846 tags => {
2847 description => "The current configured tags, if any",
2848 type => 'string',
2849 optional => 1,
2850 },
2851 'running-machine' => {
2852 description => "The currently running machine type (if running).",
2853 type => 'string',
2854 optional => 1,
2855 },
2856 'running-qemu' => {
2857 description => "The currently running QEMU version (if running).",
2858 type => 'string',
2859 optional => 1,
2860 },
2861 };
2862
2863 my $last_proc_pid_stat;
2864
2865 # get VM status information
2866 # This must be fast and should not block ($full == false)
2867 # We only query KVM using QMP if $full == true (this can be slow)
2868 sub vmstatus {
2869 my ($opt_vmid, $full) = @_;
2870
2871 my $res = {};
2872
2873 my $storecfg = PVE::Storage::config();
2874
2875 my $list = vzlist();
2876 my $defaults = load_defaults();
2877
2878 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2879
2880 my $cpucount = $cpuinfo->{cpus} || 1;
2881
2882 foreach my $vmid (keys %$list) {
2883 next if $opt_vmid && ($vmid ne $opt_vmid);
2884
2885 my $conf = PVE::QemuConfig->load_config($vmid);
2886
2887 my $d = { vmid => int($vmid) };
2888 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2889
2890 # fixme: better status?
2891 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2892
2893 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2894 if (defined($size)) {
2895 $d->{disk} = 0; # no info available
2896 $d->{maxdisk} = $size;
2897 } else {
2898 $d->{disk} = 0;
2899 $d->{maxdisk} = 0;
2900 }
2901
2902 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2903 * ($conf->{cores} || $defaults->{cores});
2904 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2905 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2906
2907 $d->{name} = $conf->{name} || "VM $vmid";
2908 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2909 : $defaults->{memory}*(1024*1024);
2910
2911 if ($conf->{balloon}) {
2912 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2913 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2914 : $defaults->{shares};
2915 }
2916
2917 $d->{uptime} = 0;
2918 $d->{cpu} = 0;
2919 $d->{mem} = 0;
2920
2921 $d->{netout} = 0;
2922 $d->{netin} = 0;
2923
2924 $d->{diskread} = 0;
2925 $d->{diskwrite} = 0;
2926
2927 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2928
2929 $d->{serial} = 1 if conf_has_serial($conf);
2930 $d->{lock} = $conf->{lock} if $conf->{lock};
2931 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2932
2933 $res->{$vmid} = $d;
2934 }
2935
2936 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2937 foreach my $dev (keys %$netdev) {
2938 next if $dev !~ m/^tap([1-9]\d*)i/;
2939 my $vmid = $1;
2940 my $d = $res->{$vmid};
2941 next if !$d;
2942
2943 $d->{netout} += $netdev->{$dev}->{receive};
2944 $d->{netin} += $netdev->{$dev}->{transmit};
2945
2946 if ($full) {
2947 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2948 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2949 }
2950
2951 }
2952
2953 my $ctime = gettimeofday;
2954
2955 foreach my $vmid (keys %$list) {
2956
2957 my $d = $res->{$vmid};
2958 my $pid = $d->{pid};
2959 next if !$pid;
2960
2961 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2962 next if !$pstat; # not running
2963
2964 my $used = $pstat->{utime} + $pstat->{stime};
2965
2966 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2967
2968 if ($pstat->{vsize}) {
2969 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2970 }
2971
2972 my $old = $last_proc_pid_stat->{$pid};
2973 if (!$old) {
2974 $last_proc_pid_stat->{$pid} = {
2975 time => $ctime,
2976 used => $used,
2977 cpu => 0,
2978 };
2979 next;
2980 }
2981
2982 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
2983
2984 if ($dtime > 1000) {
2985 my $dutime = $used - $old->{used};
2986
2987 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
2988 $last_proc_pid_stat->{$pid} = {
2989 time => $ctime,
2990 used => $used,
2991 cpu => $d->{cpu},
2992 };
2993 } else {
2994 $d->{cpu} = $old->{cpu};
2995 }
2996 }
2997
2998 return $res if !$full;
2999
3000 my $qmpclient = PVE::QMPClient->new();
3001
3002 my $ballooncb = sub {
3003 my ($vmid, $resp) = @_;
3004
3005 my $info = $resp->{'return'};
3006 return if !$info->{max_mem};
3007
3008 my $d = $res->{$vmid};
3009
3010 # use memory assigned to VM
3011 $d->{maxmem} = $info->{max_mem};
3012 $d->{balloon} = $info->{actual};
3013
3014 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
3015 $d->{mem} = $info->{total_mem} - $info->{free_mem};
3016 $d->{freemem} = $info->{free_mem};
3017 }
3018
3019 $d->{ballooninfo} = $info;
3020 };
3021
3022 my $blockstatscb = sub {
3023 my ($vmid, $resp) = @_;
3024 my $data = $resp->{'return'} || [];
3025 my $totalrdbytes = 0;
3026 my $totalwrbytes = 0;
3027
3028 for my $blockstat (@$data) {
3029 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
3030 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
3031
3032 $blockstat->{device} =~ s/drive-//;
3033 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
3034 }
3035 $res->{$vmid}->{diskread} = $totalrdbytes;
3036 $res->{$vmid}->{diskwrite} = $totalwrbytes;
3037 };
3038
3039 my $machinecb = sub {
3040 my ($vmid, $resp) = @_;
3041 my $data = $resp->{'return'} || [];
3042
3043 $res->{$vmid}->{'running-machine'} =
3044 PVE::QemuServer::Machine::current_from_query_machines($data);
3045 };
3046
3047 my $versioncb = sub {
3048 my ($vmid, $resp) = @_;
3049 my $data = $resp->{'return'} // {};
3050 my $version = 'unknown';
3051
3052 if (my $v = $data->{qemu}) {
3053 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
3054 }
3055
3056 $res->{$vmid}->{'running-qemu'} = $version;
3057 };
3058
3059 my $statuscb = sub {
3060 my ($vmid, $resp) = @_;
3061
3062 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
3063 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
3064 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
3065 # this fails if ballon driver is not loaded, so this must be
3066 # the last commnand (following command are aborted if this fails).
3067 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
3068
3069 my $status = 'unknown';
3070 if (!defined($status = $resp->{'return'}->{status})) {
3071 warn "unable to get VM status\n";
3072 return;
3073 }
3074
3075 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
3076 };
3077
3078 foreach my $vmid (keys %$list) {
3079 next if $opt_vmid && ($vmid ne $opt_vmid);
3080 next if !$res->{$vmid}->{pid}; # not running
3081 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
3082 }
3083
3084 $qmpclient->queue_execute(undef, 2);
3085
3086 foreach my $vmid (keys %$list) {
3087 next if $opt_vmid && ($vmid ne $opt_vmid);
3088 next if !$res->{$vmid}->{pid}; #not running
3089
3090 # we can't use the $qmpclient since it might have already aborted on
3091 # 'query-balloon', but this might also fail for older versions...
3092 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
3093 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
3094 }
3095
3096 foreach my $vmid (keys %$list) {
3097 next if $opt_vmid && ($vmid ne $opt_vmid);
3098 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
3099 }
3100
3101 return $res;
3102 }
3103
3104 sub conf_has_serial {
3105 my ($conf) = @_;
3106
3107 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3108 if ($conf->{"serial$i"}) {
3109 return 1;
3110 }
3111 }
3112
3113 return 0;
3114 }
3115
3116 sub conf_has_audio {
3117 my ($conf, $id) = @_;
3118
3119 $id //= 0;
3120 my $audio = $conf->{"audio$id"};
3121 return if !defined($audio);
3122
3123 my $audioproperties = parse_property_string($audio_fmt, $audio);
3124 my $audiodriver = $audioproperties->{driver} // 'spice';
3125
3126 return {
3127 dev => $audioproperties->{device},
3128 dev_id => "audiodev$id",
3129 backend => $audiodriver,
3130 backend_id => "$audiodriver-backend${id}",
3131 };
3132 }
3133
3134 sub audio_devs {
3135 my ($audio, $audiopciaddr, $machine_version) = @_;
3136
3137 my $devs = [];
3138
3139 my $id = $audio->{dev_id};
3140 my $audiodev = "";
3141 if (min_version($machine_version, 4, 2)) {
3142 $audiodev = ",audiodev=$audio->{backend_id}";
3143 }
3144
3145 if ($audio->{dev} eq 'AC97') {
3146 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
3147 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3148 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
3149 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3150 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
3151 } else {
3152 die "unkown audio device '$audio->{dev}', implement me!";
3153 }
3154
3155 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3156
3157 return $devs;
3158 }
3159
3160 sub get_tpm_paths {
3161 my ($vmid) = @_;
3162 return {
3163 socket => "/var/run/qemu-server/$vmid.swtpm",
3164 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3165 };
3166 }
3167
3168 sub add_tpm_device {
3169 my ($vmid, $devices, $conf) = @_;
3170
3171 return if !$conf->{tpmstate0};
3172
3173 my $paths = get_tpm_paths($vmid);
3174
3175 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3176 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3177 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3178 }
3179
3180 sub start_swtpm {
3181 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3182
3183 return if !$tpmdrive;
3184
3185 my $state;
3186 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3187 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3188 if ($storeid) {
3189 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3190 } else {
3191 $state = $tpm->{file};
3192 }
3193
3194 my $paths = get_tpm_paths($vmid);
3195
3196 # during migration, we will get state from remote
3197 #
3198 if (!$migration) {
3199 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3200 my $setup_cmd = [
3201 "swtpm_setup",
3202 "--tpmstate",
3203 "file://$state",
3204 "--createek",
3205 "--create-ek-cert",
3206 "--create-platform-cert",
3207 "--lock-nvram",
3208 "--config",
3209 "/etc/swtpm_setup.conf", # do not use XDG configs
3210 "--runas",
3211 "0", # force creation as root, error if not possible
3212 "--not-overwrite", # ignore existing state, do not modify
3213 ];
3214
3215 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3216 # TPM 2.0 supports ECC crypto, use if possible
3217 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3218
3219 run_command($setup_cmd, outfunc => sub {
3220 print "swtpm_setup: $1\n";
3221 });
3222 }
3223
3224 my $emulator_cmd = [
3225 "swtpm",
3226 "socket",
3227 "--tpmstate",
3228 "backend-uri=file://$state,mode=0600",
3229 "--ctrl",
3230 "type=unixio,path=$paths->{socket},mode=0600",
3231 "--pid",
3232 "file=$paths->{pid}",
3233 "--terminate", # terminate on QEMU disconnect
3234 "--daemon",
3235 ];
3236 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3237 run_command($emulator_cmd, outfunc => sub { print $1; });
3238
3239 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3240 while (! -e $paths->{pid}) {
3241 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3242 usleep(50_000);
3243 }
3244
3245 # return untainted PID of swtpm daemon so it can be killed on error
3246 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3247 return $1;
3248 }
3249
3250 sub vga_conf_has_spice {
3251 my ($vga) = @_;
3252
3253 my $vgaconf = parse_vga($vga);
3254 my $vgatype = $vgaconf->{type};
3255 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3256
3257 return $1 || 1;
3258 }
3259
3260 sub is_native($) {
3261 my ($arch) = @_;
3262 return get_host_arch() eq $arch;
3263 }
3264
3265 sub get_vm_arch {
3266 my ($conf) = @_;
3267 return $conf->{arch} // get_host_arch();
3268 }
3269
3270 my $default_machines = {
3271 x86_64 => 'pc',
3272 aarch64 => 'virt',
3273 };
3274
3275 sub get_installed_machine_version {
3276 my ($kvmversion) = @_;
3277 $kvmversion = kvm_user_version() if !defined($kvmversion);
3278 $kvmversion =~ m/^(\d+\.\d+)/;
3279 return $1;
3280 }
3281
3282 sub windows_get_pinned_machine_version {
3283 my ($machine, $base_version, $kvmversion) = @_;
3284
3285 my $pin_version = $base_version;
3286 if (!defined($base_version) ||
3287 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3288 ) {
3289 $pin_version = get_installed_machine_version($kvmversion);
3290 }
3291 if (!$machine || $machine eq 'pc') {
3292 $machine = "pc-i440fx-$pin_version";
3293 } elsif ($machine eq 'q35') {
3294 $machine = "pc-q35-$pin_version";
3295 } elsif ($machine eq 'virt') {
3296 $machine = "virt-$pin_version";
3297 } else {
3298 warn "unknown machine type '$machine', not touching that!\n";
3299 }
3300
3301 return $machine;
3302 }
3303
3304 sub get_vm_machine {
3305 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3306
3307 my $machine = $forcemachine || $conf->{machine};
3308
3309 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3310 $kvmversion //= kvm_user_version();
3311 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3312 # layout which confuses windows quite a bit and may result in various regressions..
3313 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3314 if (windows_version($conf->{ostype})) {
3315 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3316 }
3317 $arch //= 'x86_64';
3318 $machine ||= $default_machines->{$arch};
3319 if ($add_pve_version) {
3320 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3321 $machine .= "+pve$pvever";
3322 }
3323 }
3324
3325 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3326 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3327 $machine = $1 if $is_pxe;
3328
3329 # for version-pinned machines that do not include a pve-version (e.g.
3330 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3331 $machine .= '+pve0';
3332
3333 $machine .= '.pxe' if $is_pxe;
3334 }
3335
3336 return $machine;
3337 }
3338
3339 sub get_ovmf_files($$$) {
3340 my ($arch, $efidisk, $smm) = @_;
3341
3342 my $types = $OVMF->{$arch}
3343 or die "no OVMF images known for architecture '$arch'\n";
3344
3345 my $type = 'default';
3346 if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3347 $type = $smm ? "4m" : "4m-no-smm";
3348 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3349 }
3350
3351 return $types->{$type}->@*;
3352 }
3353
3354 my $Arch2Qemu = {
3355 aarch64 => '/usr/bin/qemu-system-aarch64',
3356 x86_64 => '/usr/bin/qemu-system-x86_64',
3357 };
3358 sub get_command_for_arch($) {
3359 my ($arch) = @_;
3360 return '/usr/bin/kvm' if is_native($arch);
3361
3362 my $cmd = $Arch2Qemu->{$arch}
3363 or die "don't know how to emulate architecture '$arch'\n";
3364 return $cmd;
3365 }
3366
3367 # To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3368 # to use in a QEMU command line (-cpu element), first array_intersect the result
3369 # of query_supported_ with query_understood_. This is necessary because:
3370 #
3371 # a) query_understood_ returns flags the host cannot use and
3372 # b) query_supported_ (rather the QMP call) doesn't actually return CPU
3373 # flags, but CPU settings - with most of them being flags. Those settings
3374 # (and some flags, curiously) cannot be specified as a "-cpu" argument.
3375 #
3376 # query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3377 # expensive. If you need the value returned from this, you can get it much
3378 # cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3379 # $accel being 'kvm' or 'tcg'.
3380 #
3381 # pvestatd calls this function on startup and whenever the QEMU/KVM version
3382 # changes, automatically populating pmxcfs.
3383 #
3384 # Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3385 # since kvm and tcg machines support different flags
3386 #
3387 sub query_supported_cpu_flags {
3388 my ($arch) = @_;
3389
3390 $arch //= get_host_arch();
3391 my $default_machine = $default_machines->{$arch};
3392
3393 my $flags = {};
3394
3395 # FIXME: Once this is merged, the code below should work for ARM as well:
3396 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3397 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3398 $arch eq "aarch64";
3399
3400 my $kvm_supported = defined(kvm_version());
3401 my $qemu_cmd = get_command_for_arch($arch);
3402 my $fakevmid = -1;
3403 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3404
3405 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3406 my $query_supported_run_qemu = sub {
3407 my ($kvm) = @_;
3408
3409 my $flags = {};
3410 my $cmd = [
3411 $qemu_cmd,
3412 '-machine', $default_machine,
3413 '-display', 'none',
3414 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3415 '-mon', 'chardev=qmp,mode=control',
3416 '-pidfile', $pidfile,
3417 '-S', '-daemonize'
3418 ];
3419
3420 if (!$kvm) {
3421 push @$cmd, '-accel', 'tcg';
3422 }
3423
3424 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3425 die "QEMU flag querying VM exited with code " . $rc if $rc;
3426
3427 eval {
3428 my $cmd_result = mon_cmd(
3429 $fakevmid,
3430 'query-cpu-model-expansion',
3431 type => 'full',
3432 model => { name => 'host' }
3433 );
3434
3435 my $props = $cmd_result->{model}->{props};
3436 foreach my $prop (keys %$props) {
3437 next if $props->{$prop} ne '1';
3438 # QEMU returns some flags multiple times, with '_', '.' or '-'
3439 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3440 # We only keep those with underscores, to match /proc/cpuinfo
3441 $prop =~ s/\.|-/_/g;
3442 $flags->{$prop} = 1;
3443 }
3444 };
3445 my $err = $@;
3446
3447 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3448 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3449
3450 die $err if $err;
3451
3452 return [ sort keys %$flags ];
3453 };
3454
3455 # We need to query QEMU twice, since KVM and TCG have different supported flags
3456 PVE::QemuConfig->lock_config($fakevmid, sub {
3457 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3458 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3459
3460 if ($kvm_supported) {
3461 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3462 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3463 }
3464 });
3465
3466 return $flags;
3467 }
3468
3469 # Understood CPU flags are written to a file at 'pve-qemu' compile time
3470 my $understood_cpu_flag_dir = "/usr/share/kvm";
3471 sub query_understood_cpu_flags {
3472 my $arch = get_host_arch();
3473 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3474
3475 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3476 if ! -e $filepath;
3477
3478 my $raw = file_get_contents($filepath);
3479 $raw =~ s/^\s+|\s+$//g;
3480 my @flags = split(/\s+/, $raw);
3481
3482 return \@flags;
3483 }
3484
3485 # Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
3486 # anymore. But smm=off seems to be required when using SeaBIOS and serial display.
3487 my sub should_disable_smm {
3488 my ($conf, $vga) = @_;
3489
3490 return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
3491 $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
3492 }
3493
3494 sub config_to_command {
3495 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3496 $pbs_backing) = @_;
3497
3498 my $cmd = [];
3499 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3500 my $devices = [];
3501 my $bridges = {};
3502 my $ostype = $conf->{ostype};
3503 my $winversion = windows_version($ostype);
3504 my $kvm = $conf->{kvm};
3505 my $nodename = nodename();
3506
3507 my $arch = get_vm_arch($conf);
3508 my $kvm_binary = get_command_for_arch($arch);
3509 my $kvmver = kvm_user_version($kvm_binary);
3510
3511 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3512 $kvmver //= "undefined";
3513 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3514 }
3515
3516 my $add_pve_version = min_version($kvmver, 4, 1);
3517
3518 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3519 my $machine_version = extract_version($machine_type, $kvmver);
3520 $kvm //= 1 if is_native($arch);
3521
3522 $machine_version =~ m/(\d+)\.(\d+)/;
3523 my ($machine_major, $machine_minor) = ($1, $2);
3524
3525 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3526 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3527 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3528 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3529 ." please upgrade node '$nodename'\n"
3530 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3531 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3532 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3533 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3534 ." node '$nodename'\n";
3535 }
3536
3537 # if a specific +pve version is required for a feature, use $version_guard
3538 # instead of min_version to allow machines to be run with the minimum
3539 # required version
3540 my $required_pve_version = 0;
3541 my $version_guard = sub {
3542 my ($major, $minor, $pve) = @_;
3543 return 0 if !min_version($machine_version, $major, $minor, $pve);
3544 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3545 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3546 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3547 return 1;
3548 };
3549
3550 if ($kvm && !defined kvm_version()) {
3551 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3552 ." or enable in BIOS.\n";
3553 }
3554
3555 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3556 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3557 my $use_old_bios_files = undef;
3558 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3559
3560 if ($conf->{affinity}) {
3561 push @$cmd, "/usr/bin/taskset";
3562 push @$cmd, "--cpu-list";
3563 push @$cmd, "--all-tasks";
3564 push @$cmd, $conf->{affinity};
3565 }
3566
3567 push @$cmd, $kvm_binary;
3568
3569 push @$cmd, '-id', $vmid;
3570
3571 my $vmname = $conf->{name} || "vm$vmid";
3572
3573 push @$cmd, '-name', "$vmname,debug-threads=on";
3574
3575 push @$cmd, '-no-shutdown';
3576
3577 my $use_virtio = 0;
3578
3579 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3580 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3581 push @$cmd, '-mon', "chardev=qmp,mode=control";
3582
3583 if (min_version($machine_version, 2, 12)) {
3584 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3585 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3586 }
3587
3588 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3589
3590 push @$cmd, '-daemonize';
3591
3592 if ($conf->{smbios1}) {
3593 my $smbios_conf = parse_smbios1($conf->{smbios1});
3594 if ($smbios_conf->{base64}) {
3595 # Do not pass base64 flag to qemu
3596 delete $smbios_conf->{base64};
3597 my $smbios_string = "";
3598 foreach my $key (keys %$smbios_conf) {
3599 my $value;
3600 if ($key eq "uuid") {
3601 $value = $smbios_conf->{uuid}
3602 } else {
3603 $value = decode_base64($smbios_conf->{$key});
3604 }
3605 # qemu accepts any binary data, only commas need escaping by double comma
3606 $value =~ s/,/,,/g;
3607 $smbios_string .= "," . $key . "=" . $value if $value;
3608 }
3609 push @$cmd, '-smbios', "type=1" . $smbios_string;
3610 } else {
3611 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3612 }
3613 }
3614
3615 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3616 my $d;
3617 if (my $efidisk = $conf->{efidisk0}) {
3618 $d = parse_drive('efidisk0', $efidisk);
3619 }
3620
3621 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3622 die "uefi base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3623
3624 my ($path, $format);
3625 my $read_only_str = '';
3626 if ($d) {
3627 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3628 $format = $d->{format};
3629 if ($storeid) {
3630 $path = PVE::Storage::path($storecfg, $d->{file});
3631 if (!defined($format)) {
3632 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3633 $format = qemu_img_format($scfg, $volname);
3634 }
3635 } else {
3636 $path = $d->{file};
3637 die "efidisk format must be specified\n"
3638 if !defined($format);
3639 }
3640
3641 $read_only_str = ',readonly=on' if drive_is_read_only($conf, $d);
3642 } else {
3643 log_warn("no efidisk configured! Using temporary efivars disk.");
3644 $path = "/tmp/$vmid-ovmf.fd";
3645 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3646 $format = 'raw';
3647 }
3648
3649 my $size_str = "";
3650
3651 if ($format eq 'raw' && $version_guard->(4, 1, 2)) {
3652 $size_str = ",size=" . (-s $ovmf_vars);
3653 }
3654
3655 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3656 my $cache = "";
3657 if ($path =~ m/^rbd:/) {
3658 $cache = ',cache=writeback';
3659 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3660 }
3661
3662 push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code";
3663 push @$cmd, '-drive', "if=pflash,unit=1$cache,format=$format,id=drive-efidisk0$size_str,file=${path}${read_only_str}";
3664 }
3665
3666 if ($q35) { # tell QEMU to load q35 config early
3667 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3668 if (min_version($machine_version, 4, 0)) {
3669 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3670 } else {
3671 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3672 }
3673 }
3674
3675 if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
3676 push @$cmd, $fixups->@*;
3677 }
3678
3679 if ($conf->{vmgenid}) {
3680 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3681 }
3682
3683 # add usb controllers
3684 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3685 $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES, $machine_version);
3686 push @$devices, @usbcontrollers if @usbcontrollers;
3687 my $vga = parse_vga($conf->{vga});
3688
3689 my $qxlnum = vga_conf_has_spice($conf->{vga});
3690 $vga->{type} = 'qxl' if $qxlnum;
3691
3692 if (!$vga->{type}) {
3693 if ($arch eq 'aarch64') {
3694 $vga->{type} = 'virtio';
3695 } elsif (min_version($machine_version, 2, 9)) {
3696 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3697 } else {
3698 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3699 }
3700 }
3701
3702 # enable absolute mouse coordinates (needed by vnc)
3703 my $tablet = $conf->{tablet};
3704 if (!defined($tablet)) {
3705 $tablet = $defaults->{tablet};
3706 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3707 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3708 }
3709
3710 if ($tablet) {
3711 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3712 my $kbd = print_keyboarddevice_full($conf, $arch);
3713 push @$devices, '-device', $kbd if defined($kbd);
3714 }
3715
3716 my $bootorder = device_bootorder($conf);
3717
3718 # host pci device passthrough
3719 my ($kvm_off, $gpu_passthrough, $legacy_igd) = PVE::QemuServer::PCI::print_hostpci_devices(
3720 $vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder);
3721
3722 # usb devices
3723 my $usb_dev_features = {};
3724 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3725
3726 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3727 $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder, $machine_version);
3728 push @$devices, @usbdevices if @usbdevices;
3729
3730 # serial devices
3731 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3732 my $path = $conf->{"serial$i"} or next;
3733 if ($path eq 'socket') {
3734 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3735 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3736 # On aarch64, serial0 is the UART device. Qemu only allows
3737 # connecting UART devices via the '-serial' command line, as
3738 # the device has a fixed slot on the hardware...
3739 if ($arch eq 'aarch64' && $i == 0) {
3740 push @$devices, '-serial', "chardev:serial$i";
3741 } else {
3742 push @$devices, '-device', "isa-serial,chardev=serial$i";
3743 }
3744 } else {
3745 die "no such serial device\n" if ! -c $path;
3746 push @$devices, '-chardev', "tty,id=serial$i,path=$path";
3747 push @$devices, '-device', "isa-serial,chardev=serial$i";
3748 }
3749 }
3750
3751 # parallel devices
3752 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3753 if (my $path = $conf->{"parallel$i"}) {
3754 die "no such parallel device\n" if ! -c $path;
3755 my $devtype = $path =~ m!^/dev/usb/lp! ? 'tty' : 'parport';
3756 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3757 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3758 }
3759 }
3760
3761 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3762 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3763 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3764 push @$devices, @$audio_devs;
3765 }
3766
3767 add_tpm_device($vmid, $devices, $conf);
3768
3769 my $sockets = 1;
3770 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3771 $sockets = $conf->{sockets} if $conf->{sockets};
3772
3773 my $cores = $conf->{cores} || 1;
3774
3775 my $maxcpus = $sockets * $cores;
3776
3777 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3778
3779 my $allowed_vcpus = $cpuinfo->{cpus};
3780
3781 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3782
3783 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3784 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3785 for (my $i = 2; $i <= $vcpus; $i++) {
3786 my $cpustr = print_cpu_device($conf,$i);
3787 push @$cmd, '-device', $cpustr;
3788 }
3789
3790 } else {
3791
3792 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3793 }
3794 push @$cmd, '-nodefaults';
3795
3796 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3797
3798 push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3799
3800 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3801
3802 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3803 push @$devices, '-device', print_vga_device(
3804 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3805
3806 push @$cmd, '-display', 'egl-headless,gl=core' if $vga->{type} eq 'virtio-gl'; # VIRGL
3807
3808 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3809 push @$cmd, '-vnc', "unix:$socket,password=on";
3810 } else {
3811 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3812 push @$cmd, '-nographic';
3813 }
3814
3815 # time drift fix
3816 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3817 my $useLocaltime = $conf->{localtime};
3818
3819 if ($winversion >= 5) { # windows
3820 $useLocaltime = 1 if !defined($conf->{localtime});
3821
3822 # use time drift fix when acpi is enabled
3823 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3824 $tdf = 1 if !defined($conf->{tdf});
3825 }
3826 }
3827
3828 if ($winversion >= 6) {
3829 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3830 push @$cmd, '-no-hpet';
3831 }
3832
3833 push @$rtcFlags, 'driftfix=slew' if $tdf;
3834
3835 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3836 push @$rtcFlags, "base=$conf->{startdate}";
3837 } elsif ($useLocaltime) {
3838 push @$rtcFlags, 'base=localtime';
3839 }
3840
3841 if ($forcecpu) {
3842 push @$cmd, '-cpu', $forcecpu;
3843 } else {
3844 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3845 }
3846
3847 PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
3848
3849 push @$cmd, '-S' if $conf->{freeze};
3850
3851 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3852
3853 my $guest_agent = parse_guest_agent($conf);
3854
3855 if ($guest_agent->{enabled}) {
3856 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3857 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3858
3859 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3860 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3861 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3862 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3863 } elsif ($guest_agent->{type} eq 'isa') {
3864 push @$devices, '-device', "isa-serial,chardev=qga0";
3865 }
3866 }
3867
3868 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3869 if ($rng && $version_guard->(4, 1, 2)) {
3870 check_rng_source($rng->{source});
3871
3872 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3873 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3874 my $limiter_str = "";
3875 if ($max_bytes) {
3876 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3877 }
3878
3879 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3880 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3881 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3882 }
3883
3884 my $spice_port;
3885
3886 if ($qxlnum || $vga->{type} =~ /^virtio/) {
3887 if ($qxlnum > 1) {
3888 if ($winversion){
3889 for (my $i = 1; $i < $qxlnum; $i++){
3890 push @$devices, '-device', print_vga_device(
3891 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3892 }
3893 } else {
3894 # assume other OS works like Linux
3895 my ($ram, $vram) = ("134217728", "67108864");
3896 if ($vga->{memory}) {
3897 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3898 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3899 }
3900 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3901 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3902 }
3903 }
3904
3905 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3906
3907 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3908 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3909 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3910
3911 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3912 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3913 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3914
3915 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3916 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3917
3918 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3919 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3920 if ($spice_enhancement->{foldersharing}) {
3921 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3922 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3923 }
3924
3925 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3926 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3927 if $spice_enhancement->{videostreaming};
3928
3929 push @$devices, '-spice', "$spice_opts";
3930 }
3931
3932 # enable balloon by default, unless explicitly disabled
3933 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3934 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3935 my $ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr";
3936 $ballooncmd .= ",free-page-reporting=on" if min_version($machine_version, 6, 2);
3937 push @$devices, '-device', $ballooncmd;
3938 }
3939
3940 if ($conf->{watchdog}) {
3941 my $wdopts = parse_watchdog($conf->{watchdog});
3942 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3943 my $watchdog = $wdopts->{model} || 'i6300esb';
3944 push @$devices, '-device', "$watchdog$pciaddr";
3945 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3946 }
3947
3948 my $vollist = [];
3949 my $scsicontroller = {};
3950 my $ahcicontroller = {};
3951 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3952
3953 # Add iscsi initiator name if available
3954 if (my $initiator = get_initiator_name()) {
3955 push @$devices, '-iscsi', "initiator-name=$initiator";
3956 }
3957
3958 PVE::QemuConfig->foreach_volume($conf, sub {
3959 my ($ds, $drive) = @_;
3960
3961 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3962 check_volume_storage_type($storecfg, $drive->{file});
3963 push @$vollist, $drive->{file};
3964 }
3965
3966 # ignore efidisk here, already added in bios/fw handling code above
3967 return if $drive->{interface} eq 'efidisk';
3968 # similar for TPM
3969 return if $drive->{interface} eq 'tpmstate';
3970
3971 $use_virtio = 1 if $ds =~ m/^virtio/;
3972
3973 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3974
3975 if ($drive->{interface} eq 'virtio'){
3976 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
3977 }
3978
3979 if ($drive->{interface} eq 'scsi') {
3980
3981 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
3982
3983 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
3984 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
3985
3986 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
3987 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
3988
3989 my $iothread = '';
3990 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
3991 $iothread .= ",iothread=iothread-$controller_prefix$controller";
3992 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
3993 } elsif ($drive->{iothread}) {
3994 log_warn(
3995 "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n"
3996 );
3997 }
3998
3999 my $queues = '';
4000 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
4001 $queues = ",num_queues=$drive->{queues}";
4002 }
4003
4004 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
4005 if !$scsicontroller->{$controller};
4006 $scsicontroller->{$controller}=1;
4007 }
4008
4009 if ($drive->{interface} eq 'sata') {
4010 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
4011 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
4012 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
4013 if !$ahcicontroller->{$controller};
4014 $ahcicontroller->{$controller}=1;
4015 }
4016
4017 my $pbs_conf = $pbs_backing->{$ds};
4018 my $pbs_name = undef;
4019 if ($pbs_conf) {
4020 $pbs_name = "drive-$ds-pbs";
4021 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
4022 }
4023
4024 my $drive_cmd = print_drive_commandline_full(
4025 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
4026
4027 # extra protection for templates, but SATA and IDE don't support it..
4028 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
4029
4030 push @$devices, '-drive',$drive_cmd;
4031 push @$devices, '-device', print_drivedevice_full(
4032 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
4033 });
4034
4035 for (my $i = 0; $i < $MAX_NETS; $i++) {
4036 my $netname = "net$i";
4037
4038 next if !$conf->{$netname};
4039 my $d = parse_net($conf->{$netname});
4040 next if !$d;
4041
4042 $use_virtio = 1 if $d->{model} eq 'virtio';
4043
4044 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
4045
4046 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
4047 push @$devices, '-netdev', $netdevfull;
4048
4049 my $netdevicefull = print_netdevice_full(
4050 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type);
4051
4052 push @$devices, '-device', $netdevicefull;
4053 }
4054
4055 if ($conf->{ivshmem}) {
4056 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
4057
4058 my $bus;
4059 if ($q35) {
4060 $bus = print_pcie_addr("ivshmem");
4061 } else {
4062 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
4063 }
4064
4065 my $ivshmem_name = $ivshmem->{name} // $vmid;
4066 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
4067
4068 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
4069 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
4070 .",size=$ivshmem->{size}M";
4071 }
4072
4073 # pci.4 is nested in pci.1
4074 $bridges->{1} = 1 if $bridges->{4};
4075
4076 if (!$q35) { # add pci bridges
4077 if (min_version($machine_version, 2, 3)) {
4078 $bridges->{1} = 1;
4079 $bridges->{2} = 1;
4080 }
4081 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
4082 }
4083
4084 for my $k (sort {$b cmp $a} keys %$bridges) {
4085 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
4086
4087 my $k_name = $k;
4088 if ($k == 2 && $legacy_igd) {
4089 $k_name = "$k-igd";
4090 }
4091 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
4092 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
4093
4094 if ($q35) { # add after -readconfig pve-q35.cfg
4095 splice @$devices, 2, 0, '-device', $devstr;
4096 } else {
4097 unshift @$devices, '-device', $devstr if $k > 0;
4098 }
4099 }
4100
4101 if (!$kvm) {
4102 push @$machineFlags, 'accel=tcg';
4103 }
4104
4105 push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga);
4106
4107 my $machine_type_min = $machine_type;
4108 if ($add_pve_version) {
4109 $machine_type_min =~ s/\+pve\d+$//;
4110 $machine_type_min .= "+pve$required_pve_version";
4111 }
4112 push @$machineFlags, "type=${machine_type_min}";
4113
4114 push @$cmd, @$devices;
4115 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
4116 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
4117 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
4118
4119 if (my $vmstate = $conf->{vmstate}) {
4120 my $statepath = PVE::Storage::path($storecfg, $vmstate);
4121 push @$vollist, $vmstate;
4122 push @$cmd, '-loadstate', $statepath;
4123 print "activating and using '$vmstate' as vmstate\n";
4124 }
4125
4126 if (PVE::QemuConfig->is_template($conf)) {
4127 # needed to workaround base volumes being read-only
4128 push @$cmd, '-snapshot';
4129 }
4130
4131 # add custom args
4132 if ($conf->{args}) {
4133 my $aa = PVE::Tools::split_args($conf->{args});
4134 push @$cmd, @$aa;
4135 }
4136
4137 return wantarray ? ($cmd, $vollist, $spice_port) : $cmd;
4138 }
4139
4140 sub check_rng_source {
4141 my ($source) = @_;
4142
4143 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4144 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4145 if ! -e $source;
4146
4147 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4148 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4149 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4150 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4151 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4152 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4153 ." to the host.\n";
4154 }
4155 }
4156
4157 sub spice_port {
4158 my ($vmid) = @_;
4159
4160 my $res = mon_cmd($vmid, 'query-spice');
4161
4162 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
4163 }
4164
4165 sub vm_devices_list {
4166 my ($vmid) = @_;
4167
4168 my $res = mon_cmd($vmid, 'query-pci');
4169 my $devices_to_check = [];
4170 my $devices = {};
4171 foreach my $pcibus (@$res) {
4172 push @$devices_to_check, @{$pcibus->{devices}},
4173 }
4174
4175 while (@$devices_to_check) {
4176 my $to_check = [];
4177 for my $d (@$devices_to_check) {
4178 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
4179 next if !$d->{'pci_bridge'};
4180
4181 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4182 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
4183 }
4184 $devices_to_check = $to_check;
4185 }
4186
4187 my $resblock = mon_cmd($vmid, 'query-block');
4188 foreach my $block (@$resblock) {
4189 if($block->{device} =~ m/^drive-(\S+)/){
4190 $devices->{$1} = 1;
4191 }
4192 }
4193
4194 my $resmice = mon_cmd($vmid, 'query-mice');
4195 foreach my $mice (@$resmice) {
4196 if ($mice->{name} eq 'QEMU HID Tablet') {
4197 $devices->{tablet} = 1;
4198 last;
4199 }
4200 }
4201
4202 # for usb devices there is no query-usb
4203 # but we can iterate over the entries in
4204 # qom-list path=/machine/peripheral
4205 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4206 foreach my $per (@$resperipheral) {
4207 if ($per->{name} =~ m/^usb(?:redirdev)?\d+$/) {
4208 $devices->{$per->{name}} = 1;
4209 }
4210 }
4211
4212 return $devices;
4213 }
4214
4215 sub vm_deviceplug {
4216 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4217
4218 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4219
4220 my $devices_list = vm_devices_list($vmid);
4221 return 1 if defined($devices_list->{$deviceid});
4222
4223 # add PCI bridge if we need it for the device
4224 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4225
4226 if ($deviceid eq 'tablet') {
4227 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4228 } elsif ($deviceid eq 'keyboard') {
4229 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4230 } elsif ($deviceid =~ m/^usbredirdev(\d+)$/) {
4231 my $id = $1;
4232 qemu_spice_usbredir_chardev_add($vmid, "usbredirchardev$id");
4233 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_spice_usbdevice($id, "xhci", $id + 1));
4234 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4235 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device, {}, $1 + 1));
4236 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4237 qemu_iothread_add($vmid, $deviceid, $device);
4238
4239 qemu_driveadd($storecfg, $vmid, $device);
4240 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4241
4242 qemu_deviceadd($vmid, $devicefull);
4243 eval { qemu_deviceaddverify($vmid, $deviceid); };
4244 if (my $err = $@) {
4245 eval { qemu_drivedel($vmid, $deviceid); };
4246 warn $@ if $@;
4247 die $err;
4248 }
4249 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4250 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4251 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4252 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4253
4254 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4255
4256 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4257 qemu_iothread_add($vmid, $deviceid, $device);
4258 $devicefull .= ",iothread=iothread-$deviceid";
4259 }
4260
4261 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4262 $devicefull .= ",num_queues=$device->{queues}";
4263 }
4264
4265 qemu_deviceadd($vmid, $devicefull);
4266 qemu_deviceaddverify($vmid, $deviceid);
4267 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4268 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4269 qemu_driveadd($storecfg, $vmid, $device);
4270
4271 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4272 eval { qemu_deviceadd($vmid, $devicefull); };
4273 if (my $err = $@) {
4274 eval { qemu_drivedel($vmid, $deviceid); };
4275 warn $@ if $@;
4276 die $err;
4277 }
4278 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4279 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4280
4281 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4282 my $use_old_bios_files = undef;
4283 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4284
4285 my $netdevicefull = print_netdevice_full(
4286 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type);
4287 qemu_deviceadd($vmid, $netdevicefull);
4288 eval {
4289 qemu_deviceaddverify($vmid, $deviceid);
4290 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4291 };
4292 if (my $err = $@) {
4293 eval { qemu_netdevdel($vmid, $deviceid); };
4294 warn $@ if $@;
4295 die $err;
4296 }
4297 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4298 my $bridgeid = $2;
4299 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4300 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4301
4302 qemu_deviceadd($vmid, $devicefull);
4303 qemu_deviceaddverify($vmid, $deviceid);
4304 } else {
4305 die "can't hotplug device '$deviceid'\n";
4306 }
4307
4308 return 1;
4309 }
4310
4311 # fixme: this should raise exceptions on error!
4312 sub vm_deviceunplug {
4313 my ($vmid, $conf, $deviceid) = @_;
4314
4315 my $devices_list = vm_devices_list($vmid);
4316 return 1 if !defined($devices_list->{$deviceid});
4317
4318 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4319 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4320
4321 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard' || $deviceid eq 'xhci') {
4322 qemu_devicedel($vmid, $deviceid);
4323 } elsif ($deviceid =~ m/^usbredirdev\d+$/) {
4324 qemu_devicedel($vmid, $deviceid);
4325 qemu_devicedelverify($vmid, $deviceid);
4326 } elsif ($deviceid =~ m/^usb\d+$/) {
4327 qemu_devicedel($vmid, $deviceid);
4328 qemu_devicedelverify($vmid, $deviceid);
4329 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4330 my $device = parse_drive($deviceid, $conf->{$deviceid});
4331
4332 qemu_devicedel($vmid, $deviceid);
4333 qemu_devicedelverify($vmid, $deviceid);
4334 qemu_drivedel($vmid, $deviceid);
4335 qemu_iothread_del($vmid, $deviceid, $device);
4336 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4337 qemu_devicedel($vmid, $deviceid);
4338 qemu_devicedelverify($vmid, $deviceid);
4339 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4340 my $device = parse_drive($deviceid, $conf->{$deviceid});
4341
4342 qemu_devicedel($vmid, $deviceid);
4343 qemu_devicedelverify($vmid, $deviceid);
4344 qemu_drivedel($vmid, $deviceid);
4345 qemu_deletescsihw($conf, $vmid, $deviceid);
4346
4347 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4348 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4349 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4350 qemu_devicedel($vmid, $deviceid);
4351 qemu_devicedelverify($vmid, $deviceid);
4352 qemu_netdevdel($vmid, $deviceid);
4353 } else {
4354 die "can't unplug device '$deviceid'\n";
4355 }
4356
4357 return 1;
4358 }
4359
4360 sub qemu_spice_usbredir_chardev_add {
4361 my ($vmid, $id) = @_;
4362
4363 mon_cmd($vmid, "chardev-add" , (
4364 id => $id,
4365 backend => {
4366 type => 'spicevmc',
4367 data => {
4368 type => "usbredir",
4369 },
4370 },
4371 ));
4372 }
4373
4374 sub qemu_deviceadd {
4375 my ($vmid, $devicefull) = @_;
4376
4377 $devicefull = "driver=".$devicefull;
4378 my %options = split(/[=,]/, $devicefull);
4379
4380 mon_cmd($vmid, "device_add" , %options);
4381 }
4382
4383 sub qemu_devicedel {
4384 my ($vmid, $deviceid) = @_;
4385
4386 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
4387 }
4388
4389 sub qemu_iothread_add {
4390 my ($vmid, $deviceid, $device) = @_;
4391
4392 if ($device->{iothread}) {
4393 my $iothreads = vm_iothreads_list($vmid);
4394 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4395 }
4396 }
4397
4398 sub qemu_iothread_del {
4399 my ($vmid, $deviceid, $device) = @_;
4400
4401 if ($device->{iothread}) {
4402 my $iothreads = vm_iothreads_list($vmid);
4403 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4404 }
4405 }
4406
4407 sub qemu_objectadd {
4408 my ($vmid, $objectid, $qomtype) = @_;
4409
4410 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4411
4412 return 1;
4413 }
4414
4415 sub qemu_objectdel {
4416 my ($vmid, $objectid) = @_;
4417
4418 mon_cmd($vmid, "object-del", id => $objectid);
4419
4420 return 1;
4421 }
4422
4423 sub qemu_driveadd {
4424 my ($storecfg, $vmid, $device) = @_;
4425
4426 my $kvmver = get_running_qemu_version($vmid);
4427 my $io_uring = min_version($kvmver, 6, 0);
4428 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4429 $drive =~ s/\\/\\\\/g;
4430 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4431
4432 # If the command succeeds qemu prints: "OK"
4433 return 1 if $ret =~ m/OK/s;
4434
4435 die "adding drive failed: $ret\n";
4436 }
4437
4438 sub qemu_drivedel {
4439 my ($vmid, $deviceid) = @_;
4440
4441 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4442 $ret =~ s/^\s+//;
4443
4444 return 1 if $ret eq "";
4445
4446 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4447 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4448
4449 die "deleting drive $deviceid failed : $ret\n";
4450 }
4451
4452 sub qemu_deviceaddverify {
4453 my ($vmid, $deviceid) = @_;
4454
4455 for (my $i = 0; $i <= 5; $i++) {
4456 my $devices_list = vm_devices_list($vmid);
4457 return 1 if defined($devices_list->{$deviceid});
4458 sleep 1;
4459 }
4460
4461 die "error on hotplug device '$deviceid'\n";
4462 }
4463
4464
4465 sub qemu_devicedelverify {
4466 my ($vmid, $deviceid) = @_;
4467
4468 # need to verify that the device is correctly removed as device_del
4469 # is async and empty return is not reliable
4470
4471 for (my $i = 0; $i <= 5; $i++) {
4472 my $devices_list = vm_devices_list($vmid);
4473 return 1 if !defined($devices_list->{$deviceid});
4474 sleep 1;
4475 }
4476
4477 die "error on hot-unplugging device '$deviceid'\n";
4478 }
4479
4480 sub qemu_findorcreatescsihw {
4481 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4482
4483 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4484
4485 my $scsihwid="$controller_prefix$controller";
4486 my $devices_list = vm_devices_list($vmid);
4487
4488 if (!defined($devices_list->{$scsihwid})) {
4489 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4490 }
4491
4492 return 1;
4493 }
4494
4495 sub qemu_deletescsihw {
4496 my ($conf, $vmid, $opt) = @_;
4497
4498 my $device = parse_drive($opt, $conf->{$opt});
4499
4500 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4501 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4502 return 1;
4503 }
4504
4505 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4506
4507 my $devices_list = vm_devices_list($vmid);
4508 foreach my $opt (keys %{$devices_list}) {
4509 if (is_valid_drivename($opt)) {
4510 my $drive = parse_drive($opt, $conf->{$opt});
4511 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4512 return 1;
4513 }
4514 }
4515 }
4516
4517 my $scsihwid="scsihw$controller";
4518
4519 vm_deviceunplug($vmid, $conf, $scsihwid);
4520
4521 return 1;
4522 }
4523
4524 sub qemu_add_pci_bridge {
4525 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4526
4527 my $bridges = {};
4528
4529 my $bridgeid;
4530
4531 print_pci_addr($device, $bridges, $arch, $machine_type);
4532
4533 while (my ($k, $v) = each %$bridges) {
4534 $bridgeid = $k;
4535 }
4536 return 1 if !defined($bridgeid) || $bridgeid < 1;
4537
4538 my $bridge = "pci.$bridgeid";
4539 my $devices_list = vm_devices_list($vmid);
4540
4541 if (!defined($devices_list->{$bridge})) {
4542 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4543 }
4544
4545 return 1;
4546 }
4547
4548 sub qemu_set_link_status {
4549 my ($vmid, $device, $up) = @_;
4550
4551 mon_cmd($vmid, "set_link", name => $device,
4552 up => $up ? JSON::true : JSON::false);
4553 }
4554
4555 sub qemu_netdevadd {
4556 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4557
4558 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4559 my %options = split(/[=,]/, $netdev);
4560
4561 if (defined(my $vhost = $options{vhost})) {
4562 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4563 }
4564
4565 if (defined(my $queues = $options{queues})) {
4566 $options{queues} = $queues + 0;
4567 }
4568
4569 mon_cmd($vmid, "netdev_add", %options);
4570 return 1;
4571 }
4572
4573 sub qemu_netdevdel {
4574 my ($vmid, $deviceid) = @_;
4575
4576 mon_cmd($vmid, "netdev_del", id => $deviceid);
4577 }
4578
4579 sub qemu_usb_hotplug {
4580 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4581
4582 return if !$device;
4583
4584 # remove the old one first
4585 vm_deviceunplug($vmid, $conf, $deviceid);
4586
4587 # check if xhci controller is necessary and available
4588 my $devicelist = vm_devices_list($vmid);
4589
4590 if (!$devicelist->{xhci}) {
4591 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4592 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr));
4593 }
4594
4595 # print_usbdevice_full expects the parsed device
4596 my $d = parse_usb_device($device->{host});
4597 $d->{usb3} = $device->{usb3};
4598
4599 # add the new one
4600 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d, $arch, $machine_type);
4601 }
4602
4603 sub qemu_cpu_hotplug {
4604 my ($vmid, $conf, $vcpus) = @_;
4605
4606 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4607
4608 my $sockets = 1;
4609 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4610 $sockets = $conf->{sockets} if $conf->{sockets};
4611 my $cores = $conf->{cores} || 1;
4612 my $maxcpus = $sockets * $cores;
4613
4614 $vcpus = $maxcpus if !$vcpus;
4615
4616 die "you can't add more vcpus than maxcpus\n"
4617 if $vcpus > $maxcpus;
4618
4619 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4620
4621 if ($vcpus < $currentvcpus) {
4622
4623 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4624
4625 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4626 qemu_devicedel($vmid, "cpu$i");
4627 my $retry = 0;
4628 my $currentrunningvcpus = undef;
4629 while (1) {
4630 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4631 last if scalar(@{$currentrunningvcpus}) == $i-1;
4632 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4633 $retry++;
4634 sleep 1;
4635 }
4636 #update conf after each succesfull cpu unplug
4637 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4638 PVE::QemuConfig->write_config($vmid, $conf);
4639 }
4640 } else {
4641 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4642 }
4643
4644 return;
4645 }
4646
4647 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4648 die "vcpus in running vm does not match its configuration\n"
4649 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4650
4651 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4652
4653 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4654 my $cpustr = print_cpu_device($conf, $i);
4655 qemu_deviceadd($vmid, $cpustr);
4656
4657 my $retry = 0;
4658 my $currentrunningvcpus = undef;
4659 while (1) {
4660 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4661 last if scalar(@{$currentrunningvcpus}) == $i;
4662 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4663 sleep 1;
4664 $retry++;
4665 }
4666 #update conf after each succesfull cpu hotplug
4667 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4668 PVE::QemuConfig->write_config($vmid, $conf);
4669 }
4670 } else {
4671
4672 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4673 mon_cmd($vmid, "cpu-add", id => int($i));
4674 }
4675 }
4676 }
4677
4678 sub qemu_block_set_io_throttle {
4679 my ($vmid, $deviceid,
4680 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4681 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4682 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4683 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4684
4685 return if !check_running($vmid) ;
4686
4687 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4688 bps => int($bps),
4689 bps_rd => int($bps_rd),
4690 bps_wr => int($bps_wr),
4691 iops => int($iops),
4692 iops_rd => int($iops_rd),
4693 iops_wr => int($iops_wr),
4694 bps_max => int($bps_max),
4695 bps_rd_max => int($bps_rd_max),
4696 bps_wr_max => int($bps_wr_max),
4697 iops_max => int($iops_max),
4698 iops_rd_max => int($iops_rd_max),
4699 iops_wr_max => int($iops_wr_max),
4700 bps_max_length => int($bps_max_length),
4701 bps_rd_max_length => int($bps_rd_max_length),
4702 bps_wr_max_length => int($bps_wr_max_length),
4703 iops_max_length => int($iops_max_length),
4704 iops_rd_max_length => int($iops_rd_max_length),
4705 iops_wr_max_length => int($iops_wr_max_length),
4706 );
4707
4708 }
4709
4710 sub qemu_block_resize {
4711 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4712
4713 my $running = check_running($vmid);
4714
4715 $size = 0 if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4716
4717 return if !$running;
4718
4719 my $padding = (1024 - $size % 1024) % 1024;
4720 $size = $size + $padding;
4721
4722 mon_cmd(
4723 $vmid,
4724 "block_resize",
4725 device => $deviceid,
4726 size => int($size),
4727 timeout => 60,
4728 );
4729 }
4730
4731 sub qemu_volume_snapshot {
4732 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4733
4734 my $running = check_running($vmid);
4735
4736 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4737 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4738 } else {
4739 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4740 }
4741 }
4742
4743 sub qemu_volume_snapshot_delete {
4744 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4745
4746 my $running = check_running($vmid);
4747
4748 if($running) {
4749
4750 $running = undef;
4751 my $conf = PVE::QemuConfig->load_config($vmid);
4752 PVE::QemuConfig->foreach_volume($conf, sub {
4753 my ($ds, $drive) = @_;
4754 $running = 1 if $drive->{file} eq $volid;
4755 });
4756 }
4757
4758 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4759 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
4760 } else {
4761 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4762 }
4763 }
4764
4765 sub set_migration_caps {
4766 my ($vmid, $savevm) = @_;
4767
4768 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4769
4770 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4771 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4772
4773 my $cap_ref = [];
4774
4775 my $enabled_cap = {
4776 "auto-converge" => 1,
4777 "xbzrle" => 1,
4778 "x-rdma-pin-all" => 0,
4779 "zero-blocks" => 0,
4780 "compress" => 0,
4781 "dirty-bitmaps" => $dirty_bitmaps,
4782 };
4783
4784 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4785
4786 for my $supported_capability (@$supported_capabilities) {
4787 push @$cap_ref, {
4788 capability => $supported_capability->{capability},
4789 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4790 };
4791 }
4792
4793 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4794 }
4795
4796 sub foreach_volid {
4797 my ($conf, $func, @param) = @_;
4798
4799 my $volhash = {};
4800
4801 my $test_volid = sub {
4802 my ($key, $drive, $snapname) = @_;
4803
4804 my $volid = $drive->{file};
4805 return if !$volid;
4806
4807 $volhash->{$volid}->{cdrom} //= 1;
4808 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4809
4810 my $replicate = $drive->{replicate} // 1;
4811 $volhash->{$volid}->{replicate} //= 0;
4812 $volhash->{$volid}->{replicate} = 1 if $replicate;
4813
4814 $volhash->{$volid}->{shared} //= 0;
4815 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4816
4817 $volhash->{$volid}->{referenced_in_config} //= 0;
4818 $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname);
4819
4820 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4821 if defined($snapname);
4822
4823 my $size = $drive->{size};
4824 $volhash->{$volid}->{size} //= $size if $size;
4825
4826 $volhash->{$volid}->{is_vmstate} //= 0;
4827 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4828
4829 $volhash->{$volid}->{is_tpmstate} //= 0;
4830 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4831
4832 $volhash->{$volid}->{is_unused} //= 0;
4833 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4834
4835 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4836 };
4837
4838 my $include_opts = {
4839 extra_keys => ['vmstate'],
4840 include_unused => 1,
4841 };
4842
4843 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4844 foreach my $snapname (keys %{$conf->{snapshots}}) {
4845 my $snap = $conf->{snapshots}->{$snapname};
4846 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4847 }
4848
4849 foreach my $volid (keys %$volhash) {
4850 &$func($volid, $volhash->{$volid}, @param);
4851 }
4852 }
4853
4854 my $fast_plug_option = {
4855 'lock' => 1,
4856 'name' => 1,
4857 'onboot' => 1,
4858 'shares' => 1,
4859 'startup' => 1,
4860 'description' => 1,
4861 'protection' => 1,
4862 'vmstatestorage' => 1,
4863 'hookscript' => 1,
4864 'tags' => 1,
4865 };
4866
4867 for my $opt (keys %$confdesc_cloudinit) {
4868 $fast_plug_option->{$opt} = 1;
4869 };
4870
4871 # hotplug changes in [PENDING]
4872 # $selection hash can be used to only apply specified options, for
4873 # example: { cores => 1 } (only apply changed 'cores')
4874 # $errors ref is used to return error messages
4875 sub vmconfig_hotplug_pending {
4876 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4877
4878 my $defaults = load_defaults();
4879 my $arch = get_vm_arch($conf);
4880 my $machine_type = get_vm_machine($conf, undef, $arch);
4881
4882 # commit values which do not have any impact on running VM first
4883 # Note: those option cannot raise errors, we we do not care about
4884 # $selection and always apply them.
4885
4886 my $add_error = sub {
4887 my ($opt, $msg) = @_;
4888 $errors->{$opt} = "hotplug problem - $msg";
4889 };
4890
4891 my $changes = 0;
4892 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4893 if ($fast_plug_option->{$opt}) {
4894 $conf->{$opt} = $conf->{pending}->{$opt};
4895 delete $conf->{pending}->{$opt};
4896 $changes = 1;
4897 }
4898 }
4899
4900 if ($changes) {
4901 PVE::QemuConfig->write_config($vmid, $conf);
4902 }
4903
4904 my $ostype = $conf->{ostype};
4905 my $version = extract_version($machine_type, get_running_qemu_version($vmid));
4906 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
4907 my $usb_hotplug = $hotplug_features->{usb}
4908 && min_version($version, 7, 1)
4909 && defined($ostype) && ($ostype eq 'l26' || windows_version($ostype) > 7);
4910
4911 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
4912 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4913 foreach my $opt (sort keys %$pending_delete_hash) {
4914 next if $selection && !$selection->{$opt};
4915 my $force = $pending_delete_hash->{$opt}->{force};
4916 eval {
4917 if ($opt eq 'hotplug') {
4918 die "skip\n" if ($conf->{hotplug} =~ /memory/);
4919 } elsif ($opt eq 'tablet') {
4920 die "skip\n" if !$hotplug_features->{usb};
4921 if ($defaults->{tablet}) {
4922 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4923 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4924 if $arch eq 'aarch64';
4925 } else {
4926 vm_deviceunplug($vmid, $conf, 'tablet');
4927 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4928 }
4929 } elsif ($opt =~ m/^usb(\d+)$/) {
4930 my $index = $1;
4931 die "skip\n" if !$usb_hotplug;
4932 vm_deviceunplug($vmid, $conf, "usbredirdev$index"); # if it's a spice port
4933 vm_deviceunplug($vmid, $conf, $opt);
4934 } elsif ($opt eq 'vcpus') {
4935 die "skip\n" if !$hotplug_features->{cpu};
4936 qemu_cpu_hotplug($vmid, $conf, undef);
4937 } elsif ($opt eq 'balloon') {
4938 # enable balloon device is not hotpluggable
4939 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
4940 # here we reset the ballooning value to memory
4941 my $balloon = $conf->{memory} || $defaults->{memory};
4942 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4943 } elsif ($fast_plug_option->{$opt}) {
4944 # do nothing
4945 } elsif ($opt =~ m/^net(\d+)$/) {
4946 die "skip\n" if !$hotplug_features->{network};
4947 vm_deviceunplug($vmid, $conf, $opt);
4948 } elsif (is_valid_drivename($opt)) {
4949 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
4950 vm_deviceunplug($vmid, $conf, $opt);
4951 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4952 } elsif ($opt =~ m/^memory$/) {
4953 die "skip\n" if !$hotplug_features->{memory};
4954 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
4955 } elsif ($opt eq 'cpuunits') {
4956 $cgroup->change_cpu_shares(undef);
4957 } elsif ($opt eq 'cpulimit') {
4958 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
4959 } else {
4960 die "skip\n";
4961 }
4962 };
4963 if (my $err = $@) {
4964 &$add_error($opt, $err) if $err ne "skip\n";
4965 } else {
4966 delete $conf->{$opt};
4967 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4968 }
4969 }
4970
4971 foreach my $opt (keys %{$conf->{pending}}) {
4972 next if $selection && !$selection->{$opt};
4973 my $value = $conf->{pending}->{$opt};
4974 eval {
4975 if ($opt eq 'hotplug') {
4976 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
4977 } elsif ($opt eq 'tablet') {
4978 die "skip\n" if !$hotplug_features->{usb};
4979 if ($value == 1) {
4980 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4981 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4982 if $arch eq 'aarch64';
4983 } elsif ($value == 0) {
4984 vm_deviceunplug($vmid, $conf, 'tablet');
4985 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4986 }
4987 } elsif ($opt =~ m/^usb(\d+)$/) {
4988 my $index = $1;
4989 die "skip\n" if !$usb_hotplug;
4990 my $d = eval { parse_property_string($usbdesc->{format}, $value) };
4991 my $id = $opt;
4992 if ($d->{host} eq 'spice') {
4993 $id = "usbredirdev$index";
4994 }
4995 qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type);
4996 } elsif ($opt eq 'vcpus') {
4997 die "skip\n" if !$hotplug_features->{cpu};
4998 qemu_cpu_hotplug($vmid, $conf, $value);
4999 } elsif ($opt eq 'balloon') {
5000 # enable/disable balloning device is not hotpluggable
5001 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
5002 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
5003 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
5004
5005 # allow manual ballooning if shares is set to zero
5006 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
5007 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
5008 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
5009 }
5010 } elsif ($opt =~ m/^net(\d+)$/) {
5011 # some changes can be done without hotplug
5012 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
5013 $vmid, $opt, $value, $arch, $machine_type);
5014 } elsif (is_valid_drivename($opt)) {
5015 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
5016 # some changes can be done without hotplug
5017 my $drive = parse_drive($opt, $value);
5018 if (drive_is_cloudinit($drive)) {
5019 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid);
5020 }
5021 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5022 $vmid, $opt, $value, $arch, $machine_type);
5023 } elsif ($opt =~ m/^memory$/) { #dimms
5024 die "skip\n" if !$hotplug_features->{memory};
5025 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
5026 } elsif ($opt eq 'cpuunits') {
5027 my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
5028 $cgroup->change_cpu_shares($new_cpuunits);
5029 } elsif ($opt eq 'cpulimit') {
5030 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
5031 $cgroup->change_cpu_quota($cpulimit, 100000);
5032 } elsif ($opt eq 'agent') {
5033 vmconfig_update_agent($conf, $opt, $value);
5034 } else {
5035 die "skip\n"; # skip non-hot-pluggable options
5036 }
5037 };
5038 if (my $err = $@) {
5039 &$add_error($opt, $err) if $err ne "skip\n";
5040 } else {
5041 $conf->{$opt} = $value;
5042 delete $conf->{pending}->{$opt};
5043 }
5044 }
5045
5046 # unplug xhci controller if no usb device is left
5047 if ($usb_hotplug) {
5048 my $has_usb = 0;
5049 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
5050 next if !defined($conf->{"usb$i"});
5051 $has_usb = 1;
5052 last;
5053 }
5054 if (!$has_usb) {
5055 vm_deviceunplug($vmid, $conf, 'xhci');
5056 }
5057 }
5058
5059 PVE::QemuConfig->write_config($vmid, $conf);
5060
5061 if($hotplug_features->{cloudinit}) {
5062 my $pending = PVE::QemuServer::Cloudinit::get_pending_config($conf, $vmid);
5063 my $regenerate = undef;
5064 for my $item (@$pending) {
5065 $regenerate = 1 if defined($item->{delete}) or defined($item->{pending});
5066 }
5067 PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid) if $regenerate;
5068 }
5069 }
5070
5071 sub try_deallocate_drive {
5072 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
5073
5074 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
5075 my $volid = $drive->{file};
5076 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
5077 my $sid = PVE::Storage::parse_volume_id($volid);
5078 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
5079
5080 # check if the disk is really unused
5081 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
5082 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
5083 PVE::Storage::vdisk_free($storecfg, $volid);
5084 return 1;
5085 } else {
5086 # If vm is not owner of this disk remove from config
5087 return 1;
5088 }
5089 }
5090
5091 return;
5092 }
5093
5094 sub vmconfig_delete_or_detach_drive {
5095 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
5096
5097 my $drive = parse_drive($opt, $conf->{$opt});
5098
5099 my $rpcenv = PVE::RPCEnvironment::get();
5100 my $authuser = $rpcenv->get_user();
5101
5102 if ($force) {
5103 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
5104 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
5105 } else {
5106 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
5107 }
5108 }
5109
5110
5111
5112 sub vmconfig_apply_pending {
5113 my ($vmid, $conf, $storecfg, $errors) = @_;
5114
5115 return if !scalar(keys %{$conf->{pending}});
5116
5117 my $add_apply_error = sub {
5118 my ($opt, $msg) = @_;
5119 my $err_msg = "unable to apply pending change $opt : $msg";
5120 $errors->{$opt} = $err_msg;
5121 warn $err_msg;
5122 };
5123
5124 # cold plug
5125
5126 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
5127 foreach my $opt (sort keys %$pending_delete_hash) {
5128 my $force = $pending_delete_hash->{$opt}->{force};
5129 eval {
5130 if ($opt =~ m/^unused/) {
5131 die "internal error";
5132 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5133 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5134 }
5135 };
5136 if (my $err = $@) {
5137 $add_apply_error->($opt, $err);
5138 } else {
5139 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5140 delete $conf->{$opt};
5141 }
5142 }
5143
5144 PVE::QemuConfig->cleanup_pending($conf);
5145
5146 my $generate_cloudnit = undef;
5147
5148 foreach my $opt (keys %{$conf->{pending}}) { # add/change
5149 next if $opt eq 'delete'; # just to be sure
5150 eval {
5151 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5152 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
5153 }
5154 };
5155 if (my $err = $@) {
5156 $add_apply_error->($opt, $err);
5157 } else {
5158
5159 if (is_valid_drivename($opt)) {
5160 my $drive = parse_drive($opt, $conf->{pending}->{$opt});
5161 $generate_cloudnit = 1 if drive_is_cloudinit($drive);
5162 }
5163
5164 $conf->{$opt} = delete $conf->{pending}->{$opt};
5165 }
5166 }
5167
5168 # write all changes at once to avoid unnecessary i/o
5169 PVE::QemuConfig->write_config($vmid, $conf);
5170 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if $generate_cloudnit;
5171 }
5172
5173 sub vmconfig_update_net {
5174 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5175
5176 my $newnet = parse_net($value);
5177
5178 if ($conf->{$opt}) {
5179 my $oldnet = parse_net($conf->{$opt});
5180
5181 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5182 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5183 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
5184 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
5185
5186 # for non online change, we try to hot-unplug
5187 die "skip\n" if !$hotplug;
5188 vm_deviceunplug($vmid, $conf, $opt);
5189 } else {
5190
5191 die "internal error" if $opt !~ m/net(\d+)/;
5192 my $iface = "tap${vmid}i$1";
5193
5194 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5195 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5196 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5197 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
5198 PVE::Network::tap_unplug($iface);
5199
5200 if ($have_sdn) {
5201 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5202 } else {
5203 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5204 }
5205 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
5206 # Rate can be applied on its own but any change above needs to
5207 # include the rate in tap_plug since OVS resets everything.
5208 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
5209 }
5210
5211 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
5212 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5213 }
5214
5215 return 1;
5216 }
5217 }
5218
5219 if ($hotplug) {
5220 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5221 } else {
5222 die "skip\n";
5223 }
5224 }
5225
5226 sub vmconfig_update_agent {
5227 my ($conf, $opt, $value) = @_;
5228
5229 die "skip\n" if !$conf->{$opt};
5230
5231 my $hotplug_options = { fstrim_cloned_disks => 1 };
5232
5233 my $old_agent = parse_guest_agent($conf);
5234 my $agent = parse_guest_agent({$opt => $value});
5235
5236 for my $option (keys %$agent) { # added/changed options
5237 next if defined($hotplug_options->{$option});
5238 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5239 }
5240
5241 for my $option (keys %$old_agent) { # removed options
5242 next if defined($hotplug_options->{$option});
5243 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5244 }
5245
5246 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
5247 }
5248
5249 sub vmconfig_update_disk {
5250 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5251
5252 my $drive = parse_drive($opt, $value);
5253
5254 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5255 my $media = $drive->{media} || 'disk';
5256 my $oldmedia = $old_drive->{media} || 'disk';
5257 die "unable to change media type\n" if $media ne $oldmedia;
5258
5259 if (!drive_is_cdrom($old_drive)) {
5260
5261 if ($drive->{file} ne $old_drive->{file}) {
5262
5263 die "skip\n" if !$hotplug;
5264
5265 # unplug and register as unused
5266 vm_deviceunplug($vmid, $conf, $opt);
5267 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5268
5269 } else {
5270 # update existing disk
5271
5272 # skip non hotpluggable value
5273 if (safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5274 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5275 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5276 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5277 safe_string_ne($drive->{ssd}, $old_drive->{ssd})) {
5278 die "skip\n";
5279 }
5280
5281 # apply throttle
5282 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5283 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5284 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5285 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5286 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5287 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5288 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5289 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5290 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5291 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5292 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5293 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5294 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5295 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5296 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5297 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5298 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5299 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5300
5301 qemu_block_set_io_throttle(
5302 $vmid,"drive-$opt",
5303 ($drive->{mbps} || 0)*1024*1024,
5304 ($drive->{mbps_rd} || 0)*1024*1024,
5305 ($drive->{mbps_wr} || 0)*1024*1024,
5306 $drive->{iops} || 0,
5307 $drive->{iops_rd} || 0,
5308 $drive->{iops_wr} || 0,
5309 ($drive->{mbps_max} || 0)*1024*1024,
5310 ($drive->{mbps_rd_max} || 0)*1024*1024,
5311 ($drive->{mbps_wr_max} || 0)*1024*1024,
5312 $drive->{iops_max} || 0,
5313 $drive->{iops_rd_max} || 0,
5314 $drive->{iops_wr_max} || 0,
5315 $drive->{bps_max_length} || 1,
5316 $drive->{bps_rd_max_length} || 1,
5317 $drive->{bps_wr_max_length} || 1,
5318 $drive->{iops_max_length} || 1,
5319 $drive->{iops_rd_max_length} || 1,
5320 $drive->{iops_wr_max_length} || 1,
5321 );
5322
5323 }
5324
5325 return 1;
5326 }
5327
5328 } else { # cdrom
5329
5330 if ($drive->{file} eq 'none') {
5331 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5332 if (drive_is_cloudinit($old_drive)) {
5333 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5334 }
5335 } else {
5336 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5337
5338 # force eject if locked
5339 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5340
5341 if ($path) {
5342 mon_cmd($vmid, "blockdev-change-medium",
5343 id => "$opt", filename => "$path");
5344 }
5345 }
5346
5347 return 1;
5348 }
5349 }
5350
5351 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5352 # hotplug new disks
5353 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5354 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5355 }
5356
5357 sub vmconfig_update_cloudinit_drive {
5358 my ($storecfg, $conf, $vmid) = @_;
5359
5360 my $cloudinit_ds = undef;
5361 my $cloudinit_drive = undef;
5362
5363 PVE::QemuConfig->foreach_volume($conf, sub {
5364 my ($ds, $drive) = @_;
5365 if (PVE::QemuServer::drive_is_cloudinit($drive)) {
5366 $cloudinit_ds = $ds;
5367 $cloudinit_drive = $drive;
5368 }
5369 });
5370
5371 return if !$cloudinit_drive;
5372
5373 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid);
5374 my $running = PVE::QemuServer::check_running($vmid);
5375
5376 if ($running) {
5377 my $path = PVE::Storage::path($storecfg, $cloudinit_drive->{file});
5378 if ($path) {
5379 mon_cmd($vmid, "eject", force => JSON::true, id => "$cloudinit_ds");
5380 mon_cmd($vmid, "blockdev-change-medium", id => "$cloudinit_ds", filename => "$path");
5381 }
5382 }
5383 }
5384
5385 # called in locked context by incoming migration
5386 sub vm_migrate_get_nbd_disks {
5387 my ($storecfg, $conf, $replicated_volumes) = @_;
5388
5389 my $local_volumes = {};
5390 PVE::QemuConfig->foreach_volume($conf, sub {
5391 my ($ds, $drive) = @_;
5392
5393 return if drive_is_cdrom($drive);
5394 return if $ds eq 'tpmstate0';
5395
5396 my $volid = $drive->{file};
5397
5398 return if !$volid;
5399
5400 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5401
5402 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5403 return if $scfg->{shared};
5404
5405 # replicated disks re-use existing state via bitmap
5406 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5407 $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
5408 });
5409 return $local_volumes;
5410 }
5411
5412 # called in locked context by incoming migration
5413 sub vm_migrate_alloc_nbd_disks {
5414 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5415
5416 my $nbd = {};
5417 foreach my $opt (sort keys %$source_volumes) {
5418 my ($volid, $storeid, $volname, $drive, $use_existing, $format) = @{$source_volumes->{$opt}};
5419
5420 if ($use_existing) {
5421 $nbd->{$opt}->{drivestr} = print_drive($drive);
5422 $nbd->{$opt}->{volid} = $volid;
5423 $nbd->{$opt}->{replicated} = 1;
5424 next;
5425 }
5426
5427 # storage mapping + volname = regular migration
5428 # storage mapping + format = remote migration
5429 # order of precedence, filtered by whether storage supports it:
5430 # 1. explicit requested format
5431 # 2. format of current volume
5432 # 3. default format of storage
5433 if (!$storagemap->{identity}) {
5434 $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
5435 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5436 if (!$format || !grep { $format eq $_ } @$validFormats) {
5437 if ($volname) {
5438 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5439 my $fileFormat = qemu_img_format($scfg, $volname);
5440 $format = $fileFormat
5441 if grep { $fileFormat eq $_ } @$validFormats;
5442 }
5443 $format //= $defFormat;
5444 }
5445 } else {
5446 # can't happen for remote migration, so $volname is always defined
5447 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5448 $format = qemu_img_format($scfg, $volname);
5449 }
5450
5451 my $size = $drive->{size} / 1024;
5452 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5453 my $newdrive = $drive;
5454 $newdrive->{format} = $format;
5455 $newdrive->{file} = $newvolid;
5456 my $drivestr = print_drive($newdrive);
5457 $nbd->{$opt}->{drivestr} = $drivestr;
5458 $nbd->{$opt}->{volid} = $newvolid;
5459 }
5460
5461 return $nbd;
5462 }
5463
5464 # see vm_start_nolock for parameters, additionally:
5465 # migrate_opts:
5466 # storagemap = parsed storage map for allocating NBD disks
5467 sub vm_start {
5468 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5469
5470 return PVE::QemuConfig->lock_config($vmid, sub {
5471 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5472
5473 die "you can't start a vm if it's a template\n"
5474 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5475
5476 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5477 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5478
5479 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5480
5481 if ($has_backup_lock && $running) {
5482 # a backup is currently running, attempt to start the guest in the
5483 # existing QEMU instance
5484 return vm_resume($vmid);
5485 }
5486
5487 PVE::QemuConfig->check_lock($conf)
5488 if !($params->{skiplock} || $has_suspended_lock);
5489
5490 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5491
5492 die "VM $vmid already running\n" if $running;
5493
5494 if (my $storagemap = $migrate_opts->{storagemap}) {
5495 my $replicated = $migrate_opts->{replicated_volumes};
5496 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5497 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5498
5499 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5500 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5501 }
5502 }
5503
5504 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5505 });
5506 }
5507
5508
5509 # params:
5510 # statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5511 # skiplock => 0/1, skip checking for config lock
5512 # skiptemplate => 0/1, skip checking whether VM is template
5513 # forcemachine => to force Qemu machine (rollback/migration)
5514 # forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5515 # timeout => in seconds
5516 # paused => start VM in paused state (backup)
5517 # resume => resume from hibernation
5518 # pbs-backing => {
5519 # sata0 => {
5520 # repository
5521 # snapshot
5522 # keyfile
5523 # archive
5524 # },
5525 # virtio2 => ...
5526 # }
5527 # migrate_opts:
5528 # nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5529 # migratedfrom => source node
5530 # spice_ticket => used for spice migration, passed via tunnel/stdin
5531 # network => CIDR of migration network
5532 # type => secure/insecure - tunnel over encrypted connection or plain-text
5533 # nbd_proto_version => int, 0 for TCP, 1 for UNIX
5534 # replicated_volumes => which volids should be re-used with bitmaps for nbd migration
5535 # offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
5536 # contained in config
5537 sub vm_start_nolock {
5538 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5539
5540 my $statefile = $params->{statefile};
5541 my $resume = $params->{resume};
5542
5543 my $migratedfrom = $migrate_opts->{migratedfrom};
5544 my $migration_type = $migrate_opts->{type};
5545
5546 my $res = {};
5547
5548 # clean up leftover reboot request files
5549 eval { clear_reboot_request($vmid); };
5550 warn $@ if $@;
5551
5552 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5553 vmconfig_apply_pending($vmid, $conf, $storecfg);
5554 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5555 }
5556
5557 # don't regenerate the ISO if the VM is started as part of a live migration
5558 # this way we can reuse the old ISO with the correct config
5559 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if !$migratedfrom;
5560
5561 # override offline migrated volumes, conf is out of date still
5562 if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
5563 for my $key (sort keys $offline_volumes->%*) {
5564 my $parsed = parse_drive($key, $conf->{$key});
5565 $parsed->{file} = $offline_volumes->{$key};
5566 $conf->{$key} = print_drive($parsed);
5567 }
5568 }
5569
5570 my $defaults = load_defaults();
5571
5572 # set environment variable useful inside network script
5573 $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom;
5574
5575 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5576
5577 my $forcemachine = $params->{forcemachine};
5578 my $forcecpu = $params->{forcecpu};
5579 if ($resume) {
5580 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5581 $forcemachine = $conf->{runningmachine};
5582 $forcecpu = $conf->{runningcpu};
5583 print "Resuming suspended VM\n";
5584 }
5585
5586 my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid,
5587 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
5588
5589 my $migration_ip;
5590 my $get_migration_ip = sub {
5591 my ($nodename) = @_;
5592
5593 return $migration_ip if defined($migration_ip);
5594
5595 my $cidr = $migrate_opts->{network};
5596
5597 if (!defined($cidr)) {
5598 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5599 $cidr = $dc_conf->{migration}->{network};
5600 }
5601
5602 if (defined($cidr)) {
5603 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5604
5605 die "could not get IP: no address configured on local " .
5606 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5607
5608 die "could not get IP: multiple addresses configured on local " .
5609 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5610
5611 $migration_ip = @$ips[0];
5612 }
5613
5614 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5615 if !defined($migration_ip);
5616
5617 return $migration_ip;
5618 };
5619
5620 my $migrate_uri;
5621 if ($statefile) {
5622 if ($statefile eq 'tcp') {
5623 my $localip = "localhost";
5624 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5625 my $nodename = nodename();
5626
5627 if (!defined($migration_type)) {
5628 if (defined($datacenterconf->{migration}->{type})) {
5629 $migration_type = $datacenterconf->{migration}->{type};
5630 } else {
5631 $migration_type = 'secure';
5632 }
5633 }
5634
5635 if ($migration_type eq 'insecure') {
5636 $localip = $get_migration_ip->($nodename);
5637 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5638 }
5639
5640 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5641 my $migrate_port = PVE::Tools::next_migrate_port($pfamily);
5642 $migrate_uri = "tcp:${localip}:${migrate_port}";
5643 push @$cmd, '-incoming', $migrate_uri;
5644 push @$cmd, '-S';
5645
5646 } elsif ($statefile eq 'unix') {
5647 # should be default for secure migrations as a ssh TCP forward
5648 # tunnel is not deterministic reliable ready and fails regurarly
5649 # to set up in time, so use UNIX socket forwards
5650 my $socket_addr = "/run/qemu-server/$vmid.migrate";
5651 unlink $socket_addr;
5652
5653 $migrate_uri = "unix:$socket_addr";
5654
5655 push @$cmd, '-incoming', $migrate_uri;
5656 push @$cmd, '-S';
5657
5658 } elsif (-e $statefile) {
5659 push @$cmd, '-loadstate', $statefile;
5660 } else {
5661 my $statepath = PVE::Storage::path($storecfg, $statefile);
5662 push @$vollist, $statefile;
5663 push @$cmd, '-loadstate', $statepath;
5664 }
5665 } elsif ($params->{paused}) {
5666 push @$cmd, '-S';
5667 }
5668
5669 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5670
5671 my $pci_devices = {}; # host pci devices
5672 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
5673 my $dev = $conf->{"hostpci$i"} or next;
5674 $pci_devices->{$i} = parse_hostpci($dev);
5675 }
5676
5677 # do not reserve pciid for mediated devices, sysfs will error out for duplicate assignment
5678 my $real_pci_devices = [ grep { !(defined($_->{mdev}) && scalar($_->{pciid}->@*) == 1) } values $pci_devices->%* ];
5679
5680 # map to a flat list of pci ids
5681 my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } $real_pci_devices->@* ];
5682
5683 # reserve all PCI IDs before actually doing anything with them
5684 PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout);
5685
5686 eval {
5687 my $uuid;
5688 for my $id (sort keys %$pci_devices) {
5689 my $d = $pci_devices->{$id};
5690 for my $dev ($d->{pciid}->@*) {
5691 my $info = PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
5692
5693 # nvidia grid needs the uuid of the mdev as qemu parameter
5694 if ($d->{mdev} && !defined($uuid) && $info->{vendor} eq '10de') {
5695 $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $id);
5696 }
5697 }
5698 }
5699 push @$cmd, '-uuid', $uuid if defined($uuid);
5700 };
5701 if (my $err = $@) {
5702 eval { cleanup_pci_devices($vmid, $conf) };
5703 warn $@ if $@;
5704 die $err;
5705 }
5706
5707 PVE::Storage::activate_volumes($storecfg, $vollist);
5708
5709 eval {
5710 run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{});
5711 };
5712 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5713 # timeout should be more than enough here...
5714 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
5715
5716 my $cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
5717
5718 my %run_params = (
5719 timeout => $statefile ? undef : $start_timeout,
5720 umask => 0077,
5721 noerr => 1,
5722 );
5723
5724 # when migrating, prefix QEMU output so other side can pick up any
5725 # errors that might occur and show the user
5726 if ($migratedfrom) {
5727 $run_params{quiet} = 1;
5728 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5729 }
5730
5731 my %systemd_properties = (
5732 Slice => 'qemu.slice',
5733 KillMode => 'process',
5734 SendSIGKILL => 0,
5735 TimeoutStopUSec => ULONG_MAX, # infinity
5736 );
5737
5738 if (PVE::CGroup::cgroup_mode() == 2) {
5739 $systemd_properties{CPUWeight} = $cpuunits;
5740 } else {
5741 $systemd_properties{CPUShares} = $cpuunits;
5742 }
5743
5744 if (my $cpulimit = $conf->{cpulimit}) {
5745 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5746 }
5747 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5748
5749 my $run_qemu = sub {
5750 PVE::Tools::run_fork sub {
5751 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5752
5753 my $tpmpid;
5754 if (my $tpm = $conf->{tpmstate0}) {
5755 # start the TPM emulator so QEMU can connect on start
5756 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5757 }
5758
5759 my $exitcode = run_command($cmd, %run_params);
5760 if ($exitcode) {
5761 if ($tpmpid) {
5762 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5763 kill 'TERM', $tpmpid;
5764 }
5765 die "QEMU exited with code $exitcode\n";
5766 }
5767 };
5768 };
5769
5770 if ($conf->{hugepages}) {
5771
5772 my $code = sub {
5773 my $hugepages_topology = PVE::QemuServer::Memory::hugepages_topology($conf);
5774 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5775
5776 PVE::QemuServer::Memory::hugepages_mount();
5777 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5778
5779 eval { $run_qemu->() };
5780 if (my $err = $@) {
5781 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5782 if !$conf->{keephugepages};
5783 die $err;
5784 }
5785
5786 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5787 if !$conf->{keephugepages};
5788 };
5789 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5790
5791 } else {
5792 eval { $run_qemu->() };
5793 }
5794
5795 if (my $err = $@) {
5796 # deactivate volumes if start fails
5797 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5798 warn $@ if $@;
5799 eval { cleanup_pci_devices($vmid, $conf) };
5800 warn $@ if $@;
5801
5802 die "start failed: $err";
5803 }
5804
5805 # re-reserve all PCI IDs now that we can know the actual VM PID
5806 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5807 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) };
5808 warn $@ if $@;
5809
5810 print "migration listens on $migrate_uri\n" if $migrate_uri;
5811 $res->{migrate_uri} = $migrate_uri;
5812
5813 if ($statefile && $statefile ne 'tcp' && $statefile ne 'unix') {
5814 eval { mon_cmd($vmid, "cont"); };
5815 warn $@ if $@;
5816 }
5817
5818 #start nbd server for storage migration
5819 if (my $nbd = $migrate_opts->{nbd}) {
5820 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
5821
5822 my $migrate_storage_uri;
5823 # nbd_protocol_version > 0 for unix socket support
5824 if ($nbd_protocol_version > 0 && $migration_type eq 'secure') {
5825 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5826 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5827 $migrate_storage_uri = "nbd:unix:$socket_path";
5828 } else {
5829 my $nodename = nodename();
5830 my $localip = $get_migration_ip->($nodename);
5831 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5832 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
5833
5834 mon_cmd($vmid, "nbd-server-start", addr => {
5835 type => 'inet',
5836 data => {
5837 host => "${localip}",
5838 port => "${storage_migrate_port}",
5839 },
5840 });
5841 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5842 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
5843 }
5844
5845 $res->{migrate_storage_uri} = $migrate_storage_uri;
5846
5847 foreach my $opt (sort keys %$nbd) {
5848 my $drivestr = $nbd->{$opt}->{drivestr};
5849 my $volid = $nbd->{$opt}->{volid};
5850 mon_cmd($vmid, "nbd-server-add", device => "drive-$opt", writable => JSON::true );
5851 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
5852 print "storage migration listens on $nbd_uri volume:$drivestr\n";
5853 print "re-using replicated volume: $opt - $volid\n"
5854 if $nbd->{$opt}->{replicated};
5855
5856 $res->{drives}->{$opt} = $nbd->{$opt};
5857 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
5858 }
5859 }
5860
5861 if ($migratedfrom) {
5862 eval {
5863 set_migration_caps($vmid);
5864 };
5865 warn $@ if $@;
5866
5867 if ($spice_port) {
5868 print "spice listens on port $spice_port\n";
5869 $res->{spice_port} = $spice_port;
5870 if ($migrate_opts->{spice_ticket}) {
5871 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
5872 $migrate_opts->{spice_ticket});
5873 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
5874 }
5875 }
5876
5877 } else {
5878 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
5879 if !$statefile && $conf->{balloon};
5880
5881 foreach my $opt (keys %$conf) {
5882 next if $opt !~ m/^net\d+$/;
5883 my $nicconf = parse_net($conf->{$opt});
5884 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
5885 }
5886 add_nets_bridge_fdb($conf, $vmid);
5887 }
5888
5889 mon_cmd($vmid, 'qom-set',
5890 path => "machine/peripheral/balloon0",
5891 property => "guest-stats-polling-interval",
5892 value => 2) if (!defined($conf->{balloon}) || $conf->{balloon});
5893
5894 if ($resume) {
5895 print "Resumed VM, removing state\n";
5896 if (my $vmstate = $conf->{vmstate}) {
5897 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
5898 PVE::Storage::vdisk_free($storecfg, $vmstate);
5899 }
5900 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
5901 PVE::QemuConfig->write_config($vmid, $conf);
5902 }
5903
5904 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
5905
5906 return $res;
5907 }
5908
5909 sub vm_commandline {
5910 my ($storecfg, $vmid, $snapname) = @_;
5911
5912 my $conf = PVE::QemuConfig->load_config($vmid);
5913
5914 my ($forcemachine, $forcecpu);
5915 if ($snapname) {
5916 my $snapshot = $conf->{snapshots}->{$snapname};
5917 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
5918
5919 # check for machine or CPU overrides in snapshot
5920 $forcemachine = $snapshot->{runningmachine};
5921 $forcecpu = $snapshot->{runningcpu};
5922
5923 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
5924
5925 $conf = $snapshot;
5926 }
5927
5928 my $defaults = load_defaults();
5929
5930 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
5931
5932 return PVE::Tools::cmd2string($cmd);
5933 }
5934
5935 sub vm_reset {
5936 my ($vmid, $skiplock) = @_;
5937
5938 PVE::QemuConfig->lock_config($vmid, sub {
5939
5940 my $conf = PVE::QemuConfig->load_config($vmid);
5941
5942 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5943
5944 mon_cmd($vmid, "system_reset");
5945 });
5946 }
5947
5948 sub get_vm_volumes {
5949 my ($conf) = @_;
5950
5951 my $vollist = [];
5952 foreach_volid($conf, sub {
5953 my ($volid, $attr) = @_;
5954
5955 return if $volid =~ m|^/|;
5956
5957 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
5958 return if !$sid;
5959
5960 push @$vollist, $volid;
5961 });
5962
5963 return $vollist;
5964 }
5965
5966 sub cleanup_pci_devices {
5967 my ($vmid, $conf) = @_;
5968
5969 foreach my $key (keys %$conf) {
5970 next if $key !~ m/^hostpci(\d+)$/;
5971 my $hostpciindex = $1;
5972 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
5973 my $d = parse_hostpci($conf->{$key});
5974 if ($d->{mdev}) {
5975 # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
5976 # don't want to break ABI just for this two liner
5977 my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
5978 PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1") if -e $dev_sysfs_dir;
5979 }
5980 }
5981 PVE::QemuServer::PCI::remove_pci_reservation($vmid);
5982 }
5983
5984 sub vm_stop_cleanup {
5985 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
5986
5987 eval {
5988
5989 if (!$keepActive) {
5990 my $vollist = get_vm_volumes($conf);
5991 PVE::Storage::deactivate_volumes($storecfg, $vollist);
5992
5993 if (my $tpmdrive = $conf->{tpmstate0}) {
5994 my $tpm = parse_drive("tpmstate0", $tpmdrive);
5995 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
5996 if ($storeid) {
5997 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
5998 }
5999 }
6000 }
6001
6002 foreach my $ext (qw(mon qmp pid vnc qga)) {
6003 unlink "/var/run/qemu-server/${vmid}.$ext";
6004 }
6005
6006 if ($conf->{ivshmem}) {
6007 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
6008 # just delete it for now, VMs which have this already open do not
6009 # are affected, but new VMs will get a separated one. If this
6010 # becomes an issue we either add some sort of ref-counting or just
6011 # add a "don't delete on stop" flag to the ivshmem format.
6012 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
6013 }
6014
6015 cleanup_pci_devices($vmid, $conf);
6016
6017 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
6018 };
6019 warn $@ if $@; # avoid errors - just warn
6020 }
6021
6022 # call only in locked context
6023 sub _do_vm_stop {
6024 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
6025
6026 my $pid = check_running($vmid, $nocheck);
6027 return if !$pid;
6028
6029 my $conf;
6030 if (!$nocheck) {
6031 $conf = PVE::QemuConfig->load_config($vmid);
6032 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6033 if (!defined($timeout) && $shutdown && $conf->{startup}) {
6034 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
6035 $timeout = $opts->{down} if $opts->{down};
6036 }
6037 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
6038 }
6039
6040 eval {
6041 if ($shutdown) {
6042 if (defined($conf) && get_qga_key($conf, 'enabled')) {
6043 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
6044 } else {
6045 mon_cmd($vmid, "system_powerdown");
6046 }
6047 } else {
6048 mon_cmd($vmid, "quit");
6049 }
6050 };
6051 my $err = $@;
6052
6053 if (!$err) {
6054 $timeout = 60 if !defined($timeout);
6055
6056 my $count = 0;
6057 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6058 $count++;
6059 sleep 1;
6060 }
6061
6062 if ($count >= $timeout) {
6063 if ($force) {
6064 warn "VM still running - terminating now with SIGTERM\n";
6065 kill 15, $pid;
6066 } else {
6067 die "VM quit/powerdown failed - got timeout\n";
6068 }
6069 } else {
6070 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6071 return;
6072 }
6073 } else {
6074 if (!check_running($vmid, $nocheck)) {
6075 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
6076 return;
6077 }
6078 if ($force) {
6079 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
6080 kill 15, $pid;
6081 } else {
6082 die "VM quit/powerdown failed\n";
6083 }
6084 }
6085
6086 # wait again
6087 $timeout = 10;
6088
6089 my $count = 0;
6090 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6091 $count++;
6092 sleep 1;
6093 }
6094
6095 if ($count >= $timeout) {
6096 warn "VM still running - terminating now with SIGKILL\n";
6097 kill 9, $pid;
6098 sleep 1;
6099 }
6100
6101 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6102 }
6103
6104 # Note: use $nocheck to skip tests if VM configuration file exists.
6105 # We need that when migration VMs to other nodes (files already moved)
6106 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
6107 sub vm_stop {
6108 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
6109
6110 $force = 1 if !defined($force) && !$shutdown;
6111
6112 if ($migratedfrom){
6113 my $pid = check_running($vmid, $nocheck, $migratedfrom);
6114 kill 15, $pid if $pid;
6115 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
6116 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
6117 return;
6118 }
6119
6120 PVE::QemuConfig->lock_config($vmid, sub {
6121 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
6122 });
6123 }
6124
6125 sub vm_reboot {
6126 my ($vmid, $timeout) = @_;
6127
6128 PVE::QemuConfig->lock_config($vmid, sub {
6129 eval {
6130
6131 # only reboot if running, as qmeventd starts it again on a stop event
6132 return if !check_running($vmid);
6133
6134 create_reboot_request($vmid);
6135
6136 my $storecfg = PVE::Storage::config();
6137 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
6138
6139 };
6140 if (my $err = $@) {
6141 # avoid that the next normal shutdown will be confused for a reboot
6142 clear_reboot_request($vmid);
6143 die $err;
6144 }
6145 });
6146 }
6147
6148 # note: if using the statestorage parameter, the caller has to check privileges
6149 sub vm_suspend {
6150 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
6151
6152 my $conf;
6153 my $path;
6154 my $storecfg;
6155 my $vmstate;
6156
6157 PVE::QemuConfig->lock_config($vmid, sub {
6158
6159 $conf = PVE::QemuConfig->load_config($vmid);
6160
6161 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
6162 PVE::QemuConfig->check_lock($conf)
6163 if !($skiplock || $is_backing_up);
6164
6165 die "cannot suspend to disk during backup\n"
6166 if $is_backing_up && $includestate;
6167
6168 if ($includestate) {
6169 $conf->{lock} = 'suspending';
6170 my $date = strftime("%Y-%m-%d", localtime(time()));
6171 $storecfg = PVE::Storage::config();
6172 if (!$statestorage) {
6173 $statestorage = find_vmstate_storage($conf, $storecfg);
6174 # check permissions for the storage
6175 my $rpcenv = PVE::RPCEnvironment::get();
6176 if ($rpcenv->{type} ne 'cli') {
6177 my $authuser = $rpcenv->get_user();
6178 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
6179 }
6180 }
6181
6182
6183 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
6184 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
6185 $path = PVE::Storage::path($storecfg, $vmstate);
6186 PVE::QemuConfig->write_config($vmid, $conf);
6187 } else {
6188 mon_cmd($vmid, "stop");
6189 }
6190 });
6191
6192 if ($includestate) {
6193 # save vm state
6194 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
6195
6196 eval {
6197 set_migration_caps($vmid, 1);
6198 mon_cmd($vmid, "savevm-start", statefile => $path);
6199 for(;;) {
6200 my $state = mon_cmd($vmid, "query-savevm");
6201 if (!$state->{status}) {
6202 die "savevm not active\n";
6203 } elsif ($state->{status} eq 'active') {
6204 sleep(1);
6205 next;
6206 } elsif ($state->{status} eq 'completed') {
6207 print "State saved, quitting\n";
6208 last;
6209 } elsif ($state->{status} eq 'failed' && $state->{error}) {
6210 die "query-savevm failed with error '$state->{error}'\n"
6211 } else {
6212 die "query-savevm returned status '$state->{status}'\n";
6213 }
6214 }
6215 };
6216 my $err = $@;
6217
6218 PVE::QemuConfig->lock_config($vmid, sub {
6219 $conf = PVE::QemuConfig->load_config($vmid);
6220 if ($err) {
6221 # cleanup, but leave suspending lock, to indicate something went wrong
6222 eval {
6223 mon_cmd($vmid, "savevm-end");
6224 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6225 PVE::Storage::vdisk_free($storecfg, $vmstate);
6226 delete $conf->@{qw(vmstate runningmachine runningcpu)};
6227 PVE::QemuConfig->write_config($vmid, $conf);
6228 };
6229 warn $@ if $@;
6230 die $err;
6231 }
6232
6233 die "lock changed unexpectedly\n"
6234 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6235
6236 mon_cmd($vmid, "quit");
6237 $conf->{lock} = 'suspended';
6238 PVE::QemuConfig->write_config($vmid, $conf);
6239 });
6240 }
6241 }
6242
6243 sub vm_resume {
6244 my ($vmid, $skiplock, $nocheck) = @_;
6245
6246 PVE::QemuConfig->lock_config($vmid, sub {
6247 my $res = mon_cmd($vmid, 'query-status');
6248 my $resume_cmd = 'cont';
6249 my $reset = 0;
6250 my $conf = PVE::QemuConfig->load_config($vmid);
6251
6252 if ($res->{status}) {
6253 return if $res->{status} eq 'running'; # job done, go home
6254 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6255 $reset = 1 if $res->{status} eq 'shutdown';
6256 }
6257
6258 if (!$nocheck) {
6259
6260 PVE::QemuConfig->check_lock($conf)
6261 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
6262 }
6263
6264 if ($reset) {
6265 # required if a VM shuts down during a backup and we get a resume
6266 # request before the backup finishes for example
6267 mon_cmd($vmid, "system_reset");
6268 }
6269
6270 add_nets_bridge_fdb($conf, $vmid) if $resume_cmd eq 'cont';
6271
6272 mon_cmd($vmid, $resume_cmd);
6273 });
6274 }
6275
6276 sub vm_sendkey {
6277 my ($vmid, $skiplock, $key) = @_;
6278
6279 PVE::QemuConfig->lock_config($vmid, sub {
6280
6281 my $conf = PVE::QemuConfig->load_config($vmid);
6282
6283 # there is no qmp command, so we use the human monitor command
6284 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
6285 die $res if $res ne '';
6286 });
6287 }
6288
6289 # vzdump restore implementaion
6290
6291 sub tar_archive_read_firstfile {
6292 my $archive = shift;
6293
6294 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6295
6296 # try to detect archive type first
6297 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
6298 die "unable to open file '$archive'\n";
6299 my $firstfile = <$fh>;
6300 kill 15, $pid;
6301 close $fh;
6302
6303 die "ERROR: archive contaions no data\n" if !$firstfile;
6304 chomp $firstfile;
6305
6306 return $firstfile;
6307 }
6308
6309 sub tar_restore_cleanup {
6310 my ($storecfg, $statfile) = @_;
6311
6312 print STDERR "starting cleanup\n";
6313
6314 if (my $fd = IO::File->new($statfile, "r")) {
6315 while (defined(my $line = <$fd>)) {
6316 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6317 my $volid = $2;
6318 eval {
6319 if ($volid =~ m|^/|) {
6320 unlink $volid || die 'unlink failed\n';
6321 } else {
6322 PVE::Storage::vdisk_free($storecfg, $volid);
6323 }
6324 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6325 };
6326 print STDERR "unable to cleanup '$volid' - $@" if $@;
6327 } else {
6328 print STDERR "unable to parse line in statfile - $line";
6329 }
6330 }
6331 $fd->close();
6332 }
6333 }
6334
6335 sub restore_file_archive {
6336 my ($archive, $vmid, $user, $opts) = @_;
6337
6338 return restore_vma_archive($archive, $vmid, $user, $opts)
6339 if $archive eq '-';
6340
6341 my $info = PVE::Storage::archive_info($archive);
6342 my $format = $opts->{format} // $info->{format};
6343 my $comp = $info->{compression};
6344
6345 # try to detect archive format
6346 if ($format eq 'tar') {
6347 return restore_tar_archive($archive, $vmid, $user, $opts);
6348 } else {
6349 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6350 }
6351 }
6352
6353 # hepler to remove disks that will not be used after restore
6354 my $restore_cleanup_oldconf = sub {
6355 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6356
6357 my $kept_disks = {};
6358
6359 PVE::QemuConfig->foreach_volume($oldconf, sub {
6360 my ($ds, $drive) = @_;
6361
6362 return if drive_is_cdrom($drive, 1);
6363
6364 my $volid = $drive->{file};
6365 return if !$volid || $volid =~ m|^/|;
6366
6367 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6368 return if !$path || !$owner || ($owner != $vmid);
6369
6370 # Note: only delete disk we want to restore
6371 # other volumes will become unused
6372 if ($virtdev_hash->{$ds}) {
6373 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6374 if (my $err = $@) {
6375 warn $err;
6376 }
6377 } else {
6378 $kept_disks->{$volid} = 1;
6379 }
6380 });
6381
6382 # after the restore we have no snapshots anymore
6383 for my $snapname (keys $oldconf->{snapshots}->%*) {
6384 my $snap = $oldconf->{snapshots}->{$snapname};
6385 if ($snap->{vmstate}) {
6386 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6387 if (my $err = $@) {
6388 warn $err;
6389 }
6390 }
6391
6392 for my $volid (keys $kept_disks->%*) {
6393 eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname); };
6394 warn $@ if $@;
6395 }
6396 }
6397 };
6398
6399 # Helper to parse vzdump backup device hints
6400 #
6401 # $rpcenv: Environment, used to ckeck storage permissions
6402 # $user: User ID, to check storage permissions
6403 # $storecfg: Storage configuration
6404 # $fh: the file handle for reading the configuration
6405 # $devinfo: should contain device sizes for all backu-up'ed devices
6406 # $options: backup options (pool, default storage)
6407 #
6408 # Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6409 my $parse_backup_hints = sub {
6410 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6411
6412 my $check_storage = sub { # assert if an image can be allocate
6413 my ($storeid, $scfg) = @_;
6414 die "Content type 'images' is not available on storage '$storeid'\n"
6415 if !$scfg->{content}->{images};
6416 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace'])
6417 if $user ne 'root@pam';
6418 };
6419
6420 my $virtdev_hash = {};
6421 while (defined(my $line = <$fh>)) {
6422 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6423 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6424 die "archive does not contain data for drive '$virtdev'\n"
6425 if !$devinfo->{$devname};
6426
6427 if (defined($options->{storage})) {
6428 $storeid = $options->{storage} || 'local';
6429 } elsif (!$storeid) {
6430 $storeid = 'local';
6431 }
6432 $format = 'raw' if !$format;
6433 $devinfo->{$devname}->{devname} = $devname;
6434 $devinfo->{$devname}->{virtdev} = $virtdev;
6435 $devinfo->{$devname}->{format} = $format;
6436 $devinfo->{$devname}->{storeid} = $storeid;
6437
6438 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6439 $check_storage->($storeid, $scfg); # permission and content type check
6440
6441 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6442 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6443 my $virtdev = $1;
6444 my $drive = parse_drive($virtdev, $2);
6445
6446 if (drive_is_cloudinit($drive)) {
6447 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6448 $storeid = $options->{storage} if defined ($options->{storage});
6449 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6450 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6451
6452 $check_storage->($storeid, $scfg); # permission and content type check
6453
6454 $virtdev_hash->{$virtdev} = {
6455 format => $format,
6456 storeid => $storeid,
6457 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6458 is_cloudinit => 1,
6459 };
6460 }
6461 }
6462 }
6463
6464 return $virtdev_hash;
6465 };
6466
6467 # Helper to allocate and activate all volumes required for a restore
6468 #
6469 # $storecfg: Storage configuration
6470 # $virtdev_hash: as returned by parse_backup_hints()
6471 #
6472 # Returns: { $virtdev => $volid }
6473 my $restore_allocate_devices = sub {
6474 my ($storecfg, $virtdev_hash, $vmid) = @_;
6475
6476 my $map = {};
6477 foreach my $virtdev (sort keys %$virtdev_hash) {
6478 my $d = $virtdev_hash->{$virtdev};
6479 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6480 my $storeid = $d->{storeid};
6481 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6482
6483 # test if requested format is supported
6484 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6485 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6486 $d->{format} = $defFormat if !$supported;
6487
6488 my $name;
6489 if ($d->{is_cloudinit}) {
6490 $name = "vm-$vmid-cloudinit";
6491 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6492 if ($scfg->{path}) {
6493 $name .= ".$d->{format}";
6494 }
6495 }
6496
6497 my $volid = PVE::Storage::vdisk_alloc(
6498 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6499
6500 print STDERR "new volume ID is '$volid'\n";
6501 $d->{volid} = $volid;
6502
6503 PVE::Storage::activate_volumes($storecfg, [$volid]);
6504
6505 $map->{$virtdev} = $volid;
6506 }
6507
6508 return $map;
6509 };
6510
6511 sub restore_update_config_line {
6512 my ($cookie, $map, $line, $unique) = @_;
6513
6514 return '' if $line =~ m/^\#qmdump\#/;
6515 return '' if $line =~ m/^\#vzdump\#/;
6516 return '' if $line =~ m/^lock:/;
6517 return '' if $line =~ m/^unused\d+:/;
6518 return '' if $line =~ m/^parent:/;
6519
6520 my $res = '';
6521
6522 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6523 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6524 # try to convert old 1.X settings
6525 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6526 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6527 my ($model, $macaddr) = split(/\=/, $devconfig);
6528 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6529 my $net = {
6530 model => $model,
6531 bridge => "vmbr$ind",
6532 macaddr => $macaddr,
6533 };
6534 my $netstr = print_net($net);
6535
6536 $res .= "net$cookie->{netcount}: $netstr\n";
6537 $cookie->{netcount}++;
6538 }
6539 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6540 my ($id, $netstr) = ($1, $2);
6541 my $net = parse_net($netstr);
6542 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6543 $netstr = print_net($net);
6544 $res .= "$id: $netstr\n";
6545 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6546 my $virtdev = $1;
6547 my $value = $3;
6548 my $di = parse_drive($virtdev, $value);
6549 if (defined($di->{backup}) && !$di->{backup}) {
6550 $res .= "#$line";
6551 } elsif ($map->{$virtdev}) {
6552 delete $di->{format}; # format can change on restore
6553 $di->{file} = $map->{$virtdev};
6554 $value = print_drive($di);
6555 $res .= "$virtdev: $value\n";
6556 } else {
6557 $res .= $line;
6558 }
6559 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6560 my $vmgenid = $1;
6561 if ($vmgenid ne '0') {
6562 # always generate a new vmgenid if there was a valid one setup
6563 $vmgenid = generate_uuid();
6564 }
6565 $res .= "vmgenid: $vmgenid\n";
6566 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6567 my ($uuid, $uuid_str);
6568 UUID::generate($uuid);
6569 UUID::unparse($uuid, $uuid_str);
6570 my $smbios1 = parse_smbios1($2);
6571 $smbios1->{uuid} = $uuid_str;
6572 $res .= $1.print_smbios1($smbios1)."\n";
6573 } else {
6574 $res .= $line;
6575 }
6576
6577 return $res;
6578 }
6579
6580 my $restore_deactivate_volumes = sub {
6581 my ($storecfg, $virtdev_hash) = @_;
6582
6583 my $vollist = [];
6584 for my $dev (values $virtdev_hash->%*) {
6585 push $vollist->@*, $dev->{volid} if $dev->{volid};
6586 }
6587
6588 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
6589 print STDERR $@ if $@;
6590 };
6591
6592 my $restore_destroy_volumes = sub {
6593 my ($storecfg, $virtdev_hash) = @_;
6594
6595 for my $dev (values $virtdev_hash->%*) {
6596 my $volid = $dev->{volid} or next;
6597 eval {
6598 PVE::Storage::vdisk_free($storecfg, $volid);
6599 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6600 };
6601 print STDERR "unable to cleanup '$volid' - $@" if $@;
6602 }
6603 };
6604
6605 my $restore_merge_config = sub {
6606 my ($filename, $backup_conf_raw, $override_conf) = @_;
6607
6608 my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
6609 for my $key (keys $override_conf->%*) {
6610 $backup_conf->{$key} = $override_conf->{$key};
6611 }
6612
6613 return $backup_conf;
6614 };
6615
6616 sub scan_volids {
6617 my ($cfg, $vmid) = @_;
6618
6619 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6620
6621 my $volid_hash = {};
6622 foreach my $storeid (keys %$info) {
6623 foreach my $item (@{$info->{$storeid}}) {
6624 next if !($item->{volid} && $item->{size});
6625 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6626 $volid_hash->{$item->{volid}} = $item;
6627 }
6628 }
6629
6630 return $volid_hash;
6631 }
6632
6633 sub update_disk_config {
6634 my ($vmid, $conf, $volid_hash) = @_;
6635
6636 my $changes;
6637 my $prefix = "VM $vmid";
6638
6639 # used and unused disks
6640 my $referenced = {};
6641
6642 # Note: it is allowed to define multiple storages with same path (alias), so
6643 # we need to check both 'volid' and real 'path' (two different volid can point
6644 # to the same path).
6645
6646 my $referencedpath = {};
6647
6648 # update size info
6649 PVE::QemuConfig->foreach_volume($conf, sub {
6650 my ($opt, $drive) = @_;
6651
6652 my $volid = $drive->{file};
6653 return if !$volid;
6654 my $volume = $volid_hash->{$volid};
6655
6656 # mark volid as "in-use" for next step
6657 $referenced->{$volid} = 1;
6658 if ($volume && (my $path = $volume->{path})) {
6659 $referencedpath->{$path} = 1;
6660 }
6661
6662 return if drive_is_cdrom($drive);
6663 return if !$volume;
6664
6665 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6666 if (defined($updated)) {
6667 $changes = 1;
6668 $conf->{$opt} = print_drive($updated);
6669 print "$prefix ($opt): $msg\n";
6670 }
6671 });
6672
6673 # remove 'unusedX' entry if volume is used
6674 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6675 my ($opt, $drive) = @_;
6676
6677 my $volid = $drive->{file};
6678 return if !$volid;
6679
6680 my $path;
6681 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6682 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6683 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6684 $changes = 1;
6685 delete $conf->{$opt};
6686 }
6687
6688 $referenced->{$volid} = 1;
6689 $referencedpath->{$path} = 1 if $path;
6690 });
6691
6692 foreach my $volid (sort keys %$volid_hash) {
6693 next if $volid =~ m/vm-$vmid-state-/;
6694 next if $referenced->{$volid};
6695 my $path = $volid_hash->{$volid}->{path};
6696 next if !$path; # just to be sure
6697 next if $referencedpath->{$path};
6698 $changes = 1;
6699 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6700 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6701 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6702 }
6703
6704 return $changes;
6705 }
6706
6707 sub rescan {
6708 my ($vmid, $nolock, $dryrun) = @_;
6709
6710 my $cfg = PVE::Storage::config();
6711
6712 print "rescan volumes...\n";
6713 my $volid_hash = scan_volids($cfg, $vmid);
6714
6715 my $updatefn = sub {
6716 my ($vmid) = @_;
6717
6718 my $conf = PVE::QemuConfig->load_config($vmid);
6719
6720 PVE::QemuConfig->check_lock($conf);
6721
6722 my $vm_volids = {};
6723 foreach my $volid (keys %$volid_hash) {
6724 my $info = $volid_hash->{$volid};
6725 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6726 }
6727
6728 my $changes = update_disk_config($vmid, $conf, $vm_volids);
6729
6730 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
6731 };
6732
6733 if (defined($vmid)) {
6734 if ($nolock) {
6735 &$updatefn($vmid);
6736 } else {
6737 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6738 }
6739 } else {
6740 my $vmlist = config_list();
6741 foreach my $vmid (keys %$vmlist) {
6742 if ($nolock) {
6743 &$updatefn($vmid);
6744 } else {
6745 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6746 }
6747 }
6748 }
6749 }
6750
6751 sub restore_proxmox_backup_archive {
6752 my ($archive, $vmid, $user, $options) = @_;
6753
6754 my $storecfg = PVE::Storage::config();
6755
6756 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
6757 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6758
6759 my $fingerprint = $scfg->{fingerprint};
6760 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
6761
6762 my $repo = PVE::PBSClient::get_repository($scfg);
6763 my $namespace = $scfg->{namespace};
6764
6765 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
6766 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
6767 local $ENV{PBS_PASSWORD} = $password;
6768 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
6769
6770 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
6771 PVE::Storage::parse_volname($storecfg, $archive);
6772
6773 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
6774
6775 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
6776
6777 my $tmpdir = "/var/tmp/vzdumptmp$$";
6778 rmtree $tmpdir;
6779 mkpath $tmpdir;
6780
6781 my $conffile = PVE::QemuConfig->config_file($vmid);
6782 # disable interrupts (always do cleanups)
6783 local $SIG{INT} =
6784 local $SIG{TERM} =
6785 local $SIG{QUIT} =
6786 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
6787
6788 # Note: $oldconf is undef if VM does not exists
6789 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6790 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6791 my $new_conf_raw = '';
6792
6793 my $rpcenv = PVE::RPCEnvironment::get();
6794 my $devinfo = {}; # info about drives included in backup
6795 my $virtdev_hash = {}; # info about allocated drives
6796
6797 eval {
6798 # enable interrupts
6799 local $SIG{INT} =
6800 local $SIG{TERM} =
6801 local $SIG{QUIT} =
6802 local $SIG{HUP} =
6803 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6804
6805 my $cfgfn = "$tmpdir/qemu-server.conf";
6806 my $firewall_config_fn = "$tmpdir/fw.conf";
6807 my $index_fn = "$tmpdir/index.json";
6808
6809 my $cmd = "restore";
6810
6811 my $param = [$pbs_backup_name, "index.json", $index_fn];
6812 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6813 my $index = PVE::Tools::file_get_contents($index_fn);
6814 $index = decode_json($index);
6815
6816 foreach my $info (@{$index->{files}}) {
6817 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
6818 my $devname = $1;
6819 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
6820 $devinfo->{$devname}->{size} = $1;
6821 } else {
6822 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
6823 }
6824 }
6825 }
6826
6827 my $is_qemu_server_backup = scalar(
6828 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
6829 );
6830 if (!$is_qemu_server_backup) {
6831 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
6832 }
6833 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
6834
6835 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
6836 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6837
6838 if ($has_firewall_config) {
6839 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
6840 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6841
6842 my $pve_firewall_dir = '/etc/pve/firewall';
6843 mkdir $pve_firewall_dir; # make sure the dir exists
6844 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
6845 }
6846
6847 my $fh = IO::File->new($cfgfn, "r") ||
6848 die "unable to read qemu-server.conf - $!\n";
6849
6850 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
6851
6852 # fixme: rate limit?
6853
6854 # create empty/temp config
6855 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
6856
6857 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
6858
6859 # allocate volumes
6860 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
6861
6862 foreach my $virtdev (sort keys %$virtdev_hash) {
6863 my $d = $virtdev_hash->{$virtdev};
6864 next if $d->{is_cloudinit}; # no need to restore cloudinit
6865
6866 # this fails if storage is unavailable
6867 my $volid = $d->{volid};
6868 my $path = PVE::Storage::path($storecfg, $volid);
6869
6870 # for live-restore we only want to preload the efidisk and TPM state
6871 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
6872
6873 my @ns_arg;
6874 if (defined(my $ns = $scfg->{namespace})) {
6875 @ns_arg = ('--ns', $ns);
6876 }
6877
6878 my $pbs_restore_cmd = [
6879 '/usr/bin/pbs-restore',
6880 '--repository', $repo,
6881 @ns_arg,
6882 $pbs_backup_name,
6883 "$d->{devname}.img.fidx",
6884 $path,
6885 '--verbose',
6886 ];
6887
6888 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
6889 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
6890
6891 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
6892 push @$pbs_restore_cmd, '--skip-zero';
6893 }
6894
6895 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
6896 print "restore proxmox backup image: $dbg_cmdstring\n";
6897 run_command($pbs_restore_cmd);
6898 }
6899
6900 $fh->seek(0, 0) || die "seek failed - $!\n";
6901
6902 my $cookie = { netcount => 0 };
6903 while (defined(my $line = <$fh>)) {
6904 $new_conf_raw .= restore_update_config_line(
6905 $cookie,
6906 $map,
6907 $line,
6908 $options->{unique},
6909 );
6910 }
6911
6912 $fh->close();
6913 };
6914 my $err = $@;
6915
6916 if ($err || !$options->{live}) {
6917 $restore_deactivate_volumes->($storecfg, $virtdev_hash);
6918 }
6919
6920 rmtree $tmpdir;
6921
6922 if ($err) {
6923 $restore_destroy_volumes->($storecfg, $virtdev_hash);
6924 die $err;
6925 }
6926
6927 if ($options->{live}) {
6928 # keep lock during live-restore
6929 $new_conf_raw .= "\nlock: create";
6930 }
6931
6932 my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $options->{override_conf});
6933 PVE::QemuConfig->write_config($vmid, $new_conf);
6934
6935 eval { rescan($vmid, 1); };
6936 warn $@ if $@;
6937
6938 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
6939
6940 if ($options->{live}) {
6941 # enable interrupts
6942 local $SIG{INT} =
6943 local $SIG{TERM} =
6944 local $SIG{QUIT} =
6945 local $SIG{HUP} =
6946 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
6947
6948 my $conf = PVE::QemuConfig->load_config($vmid);
6949 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
6950
6951 # these special drives are already restored before start
6952 delete $devinfo->{'drive-efidisk0'};
6953 delete $devinfo->{'drive-tpmstate0-backup'};
6954
6955 my $pbs_opts = {
6956 repo => $repo,
6957 keyfile => $keyfile,
6958 snapshot => $pbs_backup_name,
6959 namespace => $namespace,
6960 };
6961 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $pbs_opts);
6962
6963 PVE::QemuConfig->remove_lock($vmid, "create");
6964 }
6965 }
6966
6967 sub pbs_live_restore {
6968 my ($vmid, $conf, $storecfg, $restored_disks, $opts) = @_;
6969
6970 print "starting VM for live-restore\n";
6971 print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n";
6972
6973 my $pbs_backing = {};
6974 for my $ds (keys %$restored_disks) {
6975 $ds =~ m/^drive-(.*)$/;
6976 my $confname = $1;
6977 $pbs_backing->{$confname} = {
6978 repository => $opts->{repo},
6979 snapshot => $opts->{snapshot},
6980 archive => "$ds.img.fidx",
6981 };
6982 $pbs_backing->{$confname}->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile};
6983 $pbs_backing->{$confname}->{namespace} = $opts->{namespace} if defined($opts->{namespace});
6984
6985 my $drive = parse_drive($confname, $conf->{$confname});
6986 print "restoring '$ds' to '$drive->{file}'\n";
6987 }
6988
6989 my $drives_streamed = 0;
6990 eval {
6991 # make sure HA doesn't interrupt our restore by stopping the VM
6992 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
6993 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
6994 }
6995
6996 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
6997 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
6998 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
6999
7000 my $qmeventd_fd = register_qmeventd_handle($vmid);
7001
7002 # begin streaming, i.e. data copy from PBS to target disk for every vol,
7003 # this will effectively collapse the backing image chain consisting of
7004 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
7005 # removes itself once all backing images vanish with 'auto-remove=on')
7006 my $jobs = {};
7007 for my $ds (sort keys %$restored_disks) {
7008 my $job_id = "restore-$ds";
7009 mon_cmd($vmid, 'block-stream',
7010 'job-id' => $job_id,
7011 device => "$ds",
7012 );
7013 $jobs->{$job_id} = {};
7014 }
7015
7016 mon_cmd($vmid, 'cont');
7017 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
7018
7019 print "restore-drive jobs finished successfully, removing all tracking block devices"
7020 ." to disconnect from Proxmox Backup Server\n";
7021
7022 for my $ds (sort keys %$restored_disks) {
7023 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
7024 }
7025
7026 close($qmeventd_fd);
7027 };
7028
7029 my $err = $@;
7030
7031 if ($err) {
7032 warn "An error occurred during live-restore: $err\n";
7033 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
7034 die "live-restore failed\n";
7035 }
7036 }
7037
7038 sub restore_vma_archive {
7039 my ($archive, $vmid, $user, $opts, $comp) = @_;
7040
7041 my $readfrom = $archive;
7042
7043 my $cfg = PVE::Storage::config();
7044 my $commands = [];
7045 my $bwlimit = $opts->{bwlimit};
7046
7047 my $dbg_cmdstring = '';
7048 my $add_pipe = sub {
7049 my ($cmd) = @_;
7050 push @$commands, $cmd;
7051 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
7052 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
7053 $readfrom = '-';
7054 };
7055
7056 my $input = undef;
7057 if ($archive eq '-') {
7058 $input = '<&STDIN';
7059 } else {
7060 # If we use a backup from a PVE defined storage we also consider that
7061 # storage's rate limit:
7062 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
7063 if (defined($volid)) {
7064 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
7065 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
7066 if ($readlimit) {
7067 print STDERR "applying read rate limit: $readlimit\n";
7068 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
7069 $add_pipe->($cstream);
7070 }
7071 }
7072 }
7073
7074 if ($comp) {
7075 my $info = PVE::Storage::decompressor_info('vma', $comp);
7076 my $cmd = $info->{decompressor};
7077 push @$cmd, $readfrom;
7078 $add_pipe->($cmd);
7079 }
7080
7081 my $tmpdir = "/var/tmp/vzdumptmp$$";
7082 rmtree $tmpdir;
7083
7084 # disable interrupts (always do cleanups)
7085 local $SIG{INT} =
7086 local $SIG{TERM} =
7087 local $SIG{QUIT} =
7088 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
7089
7090 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
7091 POSIX::mkfifo($mapfifo, 0600);
7092 my $fifofh;
7093 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
7094
7095 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
7096
7097 my $oldtimeout;
7098 my $timeout = 5;
7099
7100 my $devinfo = {}; # info about drives included in backup
7101 my $virtdev_hash = {}; # info about allocated drives
7102
7103 my $rpcenv = PVE::RPCEnvironment::get();
7104
7105 my $conffile = PVE::QemuConfig->config_file($vmid);
7106
7107 # Note: $oldconf is undef if VM does not exist
7108 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7109 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
7110 my $new_conf_raw = '';
7111
7112 my %storage_limits;
7113
7114 my $print_devmap = sub {
7115 my $cfgfn = "$tmpdir/qemu-server.conf";
7116
7117 # we can read the config - that is already extracted
7118 my $fh = IO::File->new($cfgfn, "r") ||
7119 die "unable to read qemu-server.conf - $!\n";
7120
7121 my $fwcfgfn = "$tmpdir/qemu-server.fw";
7122 if (-f $fwcfgfn) {
7123 my $pve_firewall_dir = '/etc/pve/firewall';
7124 mkdir $pve_firewall_dir; # make sure the dir exists
7125 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
7126 }
7127
7128 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
7129
7130 foreach my $info (values %{$virtdev_hash}) {
7131 my $storeid = $info->{storeid};
7132 next if defined($storage_limits{$storeid});
7133
7134 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
7135 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
7136 $storage_limits{$storeid} = $limit * 1024;
7137 }
7138
7139 foreach my $devname (keys %$devinfo) {
7140 die "found no device mapping information for device '$devname'\n"
7141 if !$devinfo->{$devname}->{virtdev};
7142 }
7143
7144 # create empty/temp config
7145 if ($oldconf) {
7146 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
7147 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
7148 }
7149
7150 # allocate volumes
7151 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
7152
7153 # print restore information to $fifofh
7154 foreach my $virtdev (sort keys %$virtdev_hash) {
7155 my $d = $virtdev_hash->{$virtdev};
7156 next if $d->{is_cloudinit}; # no need to restore cloudinit
7157
7158 my $storeid = $d->{storeid};
7159 my $volid = $d->{volid};
7160
7161 my $map_opts = '';
7162 if (my $limit = $storage_limits{$storeid}) {
7163 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
7164 }
7165
7166 my $write_zeros = 1;
7167 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
7168 $write_zeros = 0;
7169 }
7170
7171 my $path = PVE::Storage::path($cfg, $volid);
7172
7173 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
7174
7175 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
7176 }
7177
7178 $fh->seek(0, 0) || die "seek failed - $!\n";
7179
7180 my $cookie = { netcount => 0 };
7181 while (defined(my $line = <$fh>)) {
7182 $new_conf_raw .= restore_update_config_line(
7183 $cookie,
7184 $map,
7185 $line,
7186 $opts->{unique},
7187 );
7188 }
7189
7190 $fh->close();
7191 };
7192
7193 eval {
7194 # enable interrupts
7195 local $SIG{INT} =
7196 local $SIG{TERM} =
7197 local $SIG{QUIT} =
7198 local $SIG{HUP} =
7199 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7200 local $SIG{ALRM} = sub { die "got timeout\n"; };
7201
7202 $oldtimeout = alarm($timeout);
7203
7204 my $parser = sub {
7205 my $line = shift;
7206
7207 print "$line\n";
7208
7209 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
7210 my ($dev_id, $size, $devname) = ($1, $2, $3);
7211 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
7212 } elsif ($line =~ m/^CTIME: /) {
7213 # we correctly received the vma config, so we can disable
7214 # the timeout now for disk allocation (set to 10 minutes, so
7215 # that we always timeout if something goes wrong)
7216 alarm(600);
7217 &$print_devmap();
7218 print $fifofh "done\n";
7219 my $tmp = $oldtimeout || 0;
7220 $oldtimeout = undef;
7221 alarm($tmp);
7222 close($fifofh);
7223 $fifofh = undef;
7224 }
7225 };
7226
7227 print "restore vma archive: $dbg_cmdstring\n";
7228 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
7229 };
7230 my $err = $@;
7231
7232 alarm($oldtimeout) if $oldtimeout;
7233
7234 $restore_deactivate_volumes->($cfg, $virtdev_hash);
7235
7236 close($fifofh) if $fifofh;
7237 unlink $mapfifo;
7238 rmtree $tmpdir;
7239
7240 if ($err) {
7241 $restore_destroy_volumes->($cfg, $virtdev_hash);
7242 die $err;
7243 }
7244
7245 my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $opts->{override_conf});
7246 PVE::QemuConfig->write_config($vmid, $new_conf);
7247
7248 eval { rescan($vmid, 1); };
7249 warn $@ if $@;
7250
7251 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
7252 }
7253
7254 sub restore_tar_archive {
7255 my ($archive, $vmid, $user, $opts) = @_;
7256
7257 if (scalar(keys $opts->{override_conf}->%*) > 0) {
7258 my $keystring = join(' ', keys $opts->{override_conf}->%*);
7259 die "cannot pass along options ($keystring) when restoring from tar archive\n";
7260 }
7261
7262 if ($archive ne '-') {
7263 my $firstfile = tar_archive_read_firstfile($archive);
7264 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
7265 if $firstfile ne 'qemu-server.conf';
7266 }
7267
7268 my $storecfg = PVE::Storage::config();
7269
7270 # avoid zombie disks when restoring over an existing VM -> cleanup first
7271 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
7272 # skiplock=1 because qmrestore has set the 'create' lock itself already
7273 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
7274 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
7275
7276 my $tocmd = "/usr/lib/qemu-server/qmextract";
7277
7278 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
7279 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
7280 $tocmd .= ' --prealloc' if $opts->{prealloc};
7281 $tocmd .= ' --info' if $opts->{info};
7282
7283 # tar option "xf" does not autodetect compression when read from STDIN,
7284 # so we pipe to zcat
7285 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7286 PVE::Tools::shellquote("--to-command=$tocmd");
7287
7288 my $tmpdir = "/var/tmp/vzdumptmp$$";
7289 mkpath $tmpdir;
7290
7291 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7292 local $ENV{VZDUMP_VMID} = $vmid;
7293 local $ENV{VZDUMP_USER} = $user;
7294
7295 my $conffile = PVE::QemuConfig->config_file($vmid);
7296 my $new_conf_raw = '';
7297
7298 # disable interrupts (always do cleanups)
7299 local $SIG{INT} =
7300 local $SIG{TERM} =
7301 local $SIG{QUIT} =
7302 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7303
7304 eval {
7305 # enable interrupts
7306 local $SIG{INT} =
7307 local $SIG{TERM} =
7308 local $SIG{QUIT} =
7309 local $SIG{HUP} =
7310 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7311
7312 if ($archive eq '-') {
7313 print "extracting archive from STDIN\n";
7314 run_command($cmd, input => "<&STDIN");
7315 } else {
7316 print "extracting archive '$archive'\n";
7317 run_command($cmd);
7318 }
7319
7320 return if $opts->{info};
7321
7322 # read new mapping
7323 my $map = {};
7324 my $statfile = "$tmpdir/qmrestore.stat";
7325 if (my $fd = IO::File->new($statfile, "r")) {
7326 while (defined (my $line = <$fd>)) {
7327 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7328 $map->{$1} = $2 if $1;
7329 } else {
7330 print STDERR "unable to parse line in statfile - $line\n";
7331 }
7332 }
7333 $fd->close();
7334 }
7335
7336 my $confsrc = "$tmpdir/qemu-server.conf";
7337
7338 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
7339
7340 my $cookie = { netcount => 0 };
7341 while (defined (my $line = <$srcfd>)) {
7342 $new_conf_raw .= restore_update_config_line(
7343 $cookie,
7344 $map,
7345 $line,
7346 $opts->{unique},
7347 );
7348 }
7349
7350 $srcfd->close();
7351 };
7352 if (my $err = $@) {
7353 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7354 die $err;
7355 }
7356
7357 rmtree $tmpdir;
7358
7359 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7360
7361 PVE::Cluster::cfs_update(); # make sure we read new file
7362
7363 eval { rescan($vmid, 1); };
7364 warn $@ if $@;
7365 };
7366
7367 sub foreach_storage_used_by_vm {
7368 my ($conf, $func) = @_;
7369
7370 my $sidhash = {};
7371
7372 PVE::QemuConfig->foreach_volume($conf, sub {
7373 my ($ds, $drive) = @_;
7374 return if drive_is_cdrom($drive);
7375
7376 my $volid = $drive->{file};
7377
7378 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7379 $sidhash->{$sid} = $sid if $sid;
7380 });
7381
7382 foreach my $sid (sort keys %$sidhash) {
7383 &$func($sid);
7384 }
7385 }
7386
7387 my $qemu_snap_storage = {
7388 rbd => 1,
7389 };
7390 sub do_snapshots_with_qemu {
7391 my ($storecfg, $volid, $deviceid) = @_;
7392
7393 return if $deviceid =~ m/tpmstate0/;
7394
7395 my $storage_name = PVE::Storage::parse_volume_id($volid);
7396 my $scfg = $storecfg->{ids}->{$storage_name};
7397 die "could not find storage '$storage_name'\n" if !defined($scfg);
7398
7399 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7400 return 1;
7401 }
7402
7403 if ($volid =~ m/\.(qcow2|qed)$/){
7404 return 1;
7405 }
7406
7407 return;
7408 }
7409
7410 sub qga_check_running {
7411 my ($vmid, $nowarn) = @_;
7412
7413 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7414 if ($@) {
7415 warn "Qemu Guest Agent is not running - $@" if !$nowarn;
7416 return 0;
7417 }
7418 return 1;
7419 }
7420
7421 sub template_create {
7422 my ($vmid, $conf, $disk) = @_;
7423
7424 my $storecfg = PVE::Storage::config();
7425
7426 PVE::QemuConfig->foreach_volume($conf, sub {
7427 my ($ds, $drive) = @_;
7428
7429 return if drive_is_cdrom($drive);
7430 return if $disk && $ds ne $disk;
7431
7432 my $volid = $drive->{file};
7433 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7434
7435 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7436 $drive->{file} = $voliddst;
7437 $conf->{$ds} = print_drive($drive);
7438 PVE::QemuConfig->write_config($vmid, $conf);
7439 });
7440 }
7441
7442 sub convert_iscsi_path {
7443 my ($path) = @_;
7444
7445 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7446 my $portal = $1;
7447 my $target = $2;
7448 my $lun = $3;
7449
7450 my $initiator_name = get_initiator_name();
7451
7452 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7453 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7454 }
7455
7456 die "cannot convert iscsi path '$path', unkown format\n";
7457 }
7458
7459 sub qemu_img_convert {
7460 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized) = @_;
7461
7462 my $storecfg = PVE::Storage::config();
7463 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7464 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7465
7466 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7467
7468 my $cachemode;
7469 my $src_path;
7470 my $src_is_iscsi = 0;
7471 my $src_format;
7472
7473 if ($src_storeid) {
7474 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7475 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7476 $src_format = qemu_img_format($src_scfg, $src_volname);
7477 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7478 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7479 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7480 } elsif (-f $src_volid || -b $src_volid) {
7481 $src_path = $src_volid;
7482 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7483 $src_format = $1;
7484 }
7485 }
7486
7487 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7488
7489 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7490 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7491 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7492 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7493
7494 my $cmd = [];
7495 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7496 push @$cmd, '-l', "snapshot.name=$snapname"
7497 if $snapname && $src_format && $src_format eq "qcow2";
7498 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7499 push @$cmd, '-T', $cachemode if defined($cachemode);
7500
7501 if ($src_is_iscsi) {
7502 push @$cmd, '--image-opts';
7503 $src_path = convert_iscsi_path($src_path);
7504 } elsif ($src_format) {
7505 push @$cmd, '-f', $src_format;
7506 }
7507
7508 if ($dst_is_iscsi) {
7509 push @$cmd, '--target-image-opts';
7510 $dst_path = convert_iscsi_path($dst_path);
7511 } else {
7512 push @$cmd, '-O', $dst_format;
7513 }
7514
7515 push @$cmd, $src_path;
7516
7517 if (!$dst_is_iscsi && $is_zero_initialized) {
7518 push @$cmd, "zeroinit:$dst_path";
7519 } else {
7520 push @$cmd, $dst_path;
7521 }
7522
7523 my $parser = sub {
7524 my $line = shift;
7525 if($line =~ m/\((\S+)\/100\%\)/){
7526 my $percent = $1;
7527 my $transferred = int($size * $percent / 100);
7528 my $total_h = render_bytes($size, 1);
7529 my $transferred_h = render_bytes($transferred, 1);
7530
7531 print "transferred $transferred_h of $total_h ($percent%)\n";
7532 }
7533
7534 };
7535
7536 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7537 my $err = $@;
7538 die "copy failed: $err" if $err;
7539 }
7540
7541 sub qemu_img_format {
7542 my ($scfg, $volname) = @_;
7543
7544 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7545 return $1;
7546 } else {
7547 return "raw";
7548 }
7549 }
7550
7551 sub qemu_drive_mirror {
7552 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7553
7554 $jobs = {} if !$jobs;
7555
7556 my $qemu_target;
7557 my $format;
7558 $jobs->{"drive-$drive"} = {};
7559
7560 if ($dst_volid =~ /^nbd:/) {
7561 $qemu_target = $dst_volid;
7562 $format = "nbd";
7563 } else {
7564 my $storecfg = PVE::Storage::config();
7565 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7566
7567 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7568
7569 $format = qemu_img_format($dst_scfg, $dst_volname);
7570
7571 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7572
7573 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7574 }
7575
7576 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7577 $opts->{format} = $format if $format;
7578
7579 if (defined($src_bitmap)) {
7580 $opts->{sync} = 'incremental';
7581 $opts->{bitmap} = $src_bitmap;
7582 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7583 }
7584
7585 if (defined($bwlimit)) {
7586 $opts->{speed} = $bwlimit * 1024;
7587 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7588 } else {
7589 print "drive mirror is starting for drive-$drive\n";
7590 }
7591
7592 # if a job already runs for this device we get an error, catch it for cleanup
7593 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7594 if (my $err = $@) {
7595 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7596 warn "$@\n" if $@;
7597 die "mirroring error: $err\n";
7598 }
7599
7600 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7601 }
7602
7603 # $completion can be either
7604 # 'complete': wait until all jobs are ready, block-job-complete them (default)
7605 # 'cancel': wait until all jobs are ready, block-job-cancel them
7606 # 'skip': wait until all jobs are ready, return with block jobs in ready state
7607 # 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7608 sub qemu_drive_mirror_monitor {
7609 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7610
7611 $completion //= 'complete';
7612 $op //= "mirror";
7613
7614 eval {
7615 my $err_complete = 0;
7616
7617 my $starttime = time ();
7618 while (1) {
7619 die "block job ('$op') timed out\n" if $err_complete > 300;
7620
7621 my $stats = mon_cmd($vmid, "query-block-jobs");
7622 my $ctime = time();
7623
7624 my $running_jobs = {};
7625 for my $stat (@$stats) {
7626 next if $stat->{type} ne $op;
7627 $running_jobs->{$stat->{device}} = $stat;
7628 }
7629
7630 my $readycounter = 0;
7631
7632 for my $job_id (sort keys %$jobs) {
7633 my $job = $running_jobs->{$job_id};
7634
7635 my $vanished = !defined($job);
7636 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7637 if($complete || ($vanished && $completion eq 'auto')) {
7638 print "$job_id: $op-job finished\n";
7639 delete $jobs->{$job_id};
7640 next;
7641 }
7642
7643 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7644
7645 my $busy = $job->{busy};
7646 my $ready = $job->{ready};
7647 if (my $total = $job->{len}) {
7648 my $transferred = $job->{offset} || 0;
7649 my $remaining = $total - $transferred;
7650 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7651
7652 my $duration = $ctime - $starttime;
7653 my $total_h = render_bytes($total, 1);
7654 my $transferred_h = render_bytes($transferred, 1);
7655
7656 my $status = sprintf(
7657 "transferred $transferred_h of $total_h ($percent%%) in %s",
7658 render_duration($duration),
7659 );
7660
7661 if ($ready) {
7662 if ($busy) {
7663 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7664 } else {
7665 $status .= ", ready";
7666 }
7667 }
7668 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7669 $jobs->{$job_id}->{ready} = $ready;
7670 }
7671
7672 $readycounter++ if $job->{ready};
7673 }
7674
7675 last if scalar(keys %$jobs) == 0;
7676
7677 if ($readycounter == scalar(keys %$jobs)) {
7678 print "all '$op' jobs are ready\n";
7679
7680 # do the complete later (or has already been done)
7681 last if $completion eq 'skip' || $completion eq 'auto';
7682
7683 if ($vmiddst && $vmiddst != $vmid) {
7684 my $agent_running = $qga && qga_check_running($vmid);
7685 if ($agent_running) {
7686 print "freeze filesystem\n";
7687 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
7688 warn $@ if $@;
7689 } else {
7690 print "suspend vm\n";
7691 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
7692 warn $@ if $@;
7693 }
7694
7695 # if we clone a disk for a new target vm, we don't switch the disk
7696 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
7697
7698 if ($agent_running) {
7699 print "unfreeze filesystem\n";
7700 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
7701 warn $@ if $@;
7702 } else {
7703 print "resume vm\n";
7704 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7705 warn $@ if $@;
7706 }
7707
7708 last;
7709 } else {
7710
7711 for my $job_id (sort keys %$jobs) {
7712 # try to switch the disk if source and destination are on the same guest
7713 print "$job_id: Completing block job_id...\n";
7714
7715 my $op;
7716 if ($completion eq 'complete') {
7717 $op = 'block-job-complete';
7718 } elsif ($completion eq 'cancel') {
7719 $op = 'block-job-cancel';
7720 } else {
7721 die "invalid completion value: $completion\n";
7722 }
7723 eval { mon_cmd($vmid, $op, device => $job_id) };
7724 if ($@ =~ m/cannot be completed/) {
7725 print "$job_id: block job cannot be completed, trying again.\n";
7726 $err_complete++;
7727 }else {
7728 print "$job_id: Completed successfully.\n";
7729 $jobs->{$job_id}->{complete} = 1;
7730 }
7731 }
7732 }
7733 }
7734 sleep 1;
7735 }
7736 };
7737 my $err = $@;
7738
7739 if ($err) {
7740 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7741 die "block job ($op) error: $err";
7742 }
7743 }
7744
7745 sub qemu_blockjobs_cancel {
7746 my ($vmid, $jobs) = @_;
7747
7748 foreach my $job (keys %$jobs) {
7749 print "$job: Cancelling block job\n";
7750 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
7751 $jobs->{$job}->{cancel} = 1;
7752 }
7753
7754 while (1) {
7755 my $stats = mon_cmd($vmid, "query-block-jobs");
7756
7757 my $running_jobs = {};
7758 foreach my $stat (@$stats) {
7759 $running_jobs->{$stat->{device}} = $stat;
7760 }
7761
7762 foreach my $job (keys %$jobs) {
7763
7764 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
7765 print "$job: Done.\n";
7766 delete $jobs->{$job};
7767 }
7768 }
7769
7770 last if scalar(keys %$jobs) == 0;
7771
7772 sleep 1;
7773 }
7774 }
7775
7776 sub clone_disk {
7777 my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
7778
7779 my ($vmid, $running) = $source->@{qw(vmid running)};
7780 my ($src_drivename, $drive, $snapname) = $source->@{qw(drivename drive snapname)};
7781
7782 my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)};
7783 my ($storage, $format) = $dest->@{qw(storage format)};
7784
7785 my $use_drive_mirror = $full && $running && $src_drivename && !$snapname;
7786
7787 if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) {
7788 die "cloning from/to EFI disk requires EFI disk\n"
7789 if $src_drivename eq 'efidisk0' || $dst_drivename eq 'efidisk0';
7790 die "cloning from/to TPM state requires TPM state\n"
7791 if $src_drivename eq 'tpmstate0' || $dst_drivename eq 'tpmstate0';
7792
7793 # This would lead to two device nodes in QEMU pointing to the same backing image!
7794 die "cannot change drive name when cloning disk from/to the same VM\n"
7795 if $use_drive_mirror && $vmid == $newvmid;
7796 }
7797
7798 die "cannot move TPM state while VM is running\n"
7799 if $use_drive_mirror && $src_drivename eq 'tpmstate0';
7800
7801 my $newvolid;
7802
7803 print "create " . ($full ? 'full' : 'linked') . " clone of drive ";
7804 print "$src_drivename " if $src_drivename;
7805 print "($drive->{file})\n";
7806
7807 if (!$full) {
7808 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
7809 push @$newvollist, $newvolid;
7810 } else {
7811
7812 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
7813 $storeid = $storage if $storage;
7814
7815 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
7816
7817 my $name = undef;
7818 my $size = undef;
7819 if (drive_is_cloudinit($drive)) {
7820 $name = "vm-$newvmid-cloudinit";
7821 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7822 if ($scfg->{path}) {
7823 $name .= ".$dst_format";
7824 }
7825 $snapname = undef;
7826 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
7827 } elsif ($dst_drivename eq 'efidisk0') {
7828 $size = $efisize or die "internal error - need to specify EFI disk size\n";
7829 } elsif ($dst_drivename eq 'tpmstate0') {
7830 $dst_format = 'raw';
7831 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7832 } else {
7833 ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
7834 }
7835 $newvolid = PVE::Storage::vdisk_alloc(
7836 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
7837 );
7838 push @$newvollist, $newvolid;
7839
7840 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
7841
7842 if (drive_is_cloudinit($drive)) {
7843 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
7844 # if this is the case, we have to complete any block-jobs still there from
7845 # previous drive-mirrors
7846 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
7847 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
7848 }
7849 goto no_data_clone;
7850 }
7851
7852 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
7853 if ($use_drive_mirror) {
7854 qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
7855 $completion, $qga, $bwlimit);
7856 } else {
7857 # TODO: handle bwlimits
7858 if ($dst_drivename eq 'efidisk0') {
7859 # the relevant data on the efidisk may be smaller than the source
7860 # e.g. on RBD/ZFS, so we use dd to copy only the amount
7861 # that is given by the OVMF_VARS.fd
7862 my $src_path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
7863 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
7864
7865 my $src_format = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[6];
7866
7867 # better for Ceph if block size is not too small, see bug #3324
7868 my $bs = 1024*1024;
7869
7870 my $cmd = ['qemu-img', 'dd', '-n', '-O', $dst_format];
7871
7872 if ($src_format eq 'qcow2' && $snapname) {
7873 die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
7874 if !min_version(kvm_user_version(), 6, 2);
7875 push $cmd->@*, '-l', $snapname;
7876 }
7877 push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
7878 run_command($cmd);
7879 } else {
7880 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit);
7881 }
7882 }
7883 }
7884
7885 no_data_clone:
7886 my ($size) = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
7887
7888 my $disk = dclone($drive);
7889 delete $disk->{format};
7890 $disk->{file} = $newvolid;
7891 $disk->{size} = $size if defined($size);
7892
7893 return $disk;
7894 }
7895
7896 sub get_running_qemu_version {
7897 my ($vmid) = @_;
7898 my $res = mon_cmd($vmid, "query-version");
7899 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
7900 }
7901
7902 sub qemu_use_old_bios_files {
7903 my ($machine_type) = @_;
7904
7905 return if !$machine_type;
7906
7907 my $use_old_bios_files = undef;
7908
7909 if ($machine_type =~ m/^(\S+)\.pxe$/) {
7910 $machine_type = $1;
7911 $use_old_bios_files = 1;
7912 } else {
7913 my $version = extract_version($machine_type, kvm_user_version());
7914 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
7915 # load new efi bios files on migration. So this hack is required to allow
7916 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
7917 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
7918 $use_old_bios_files = !min_version($version, 2, 4);
7919 }
7920
7921 return ($use_old_bios_files, $machine_type);
7922 }
7923
7924 sub get_efivars_size {
7925 my ($conf, $efidisk) = @_;
7926
7927 my $arch = get_vm_arch($conf);
7928 $efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
7929 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
7930 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7931 die "uefi vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
7932 return -s $ovmf_vars;
7933 }
7934
7935 sub update_efidisk_size {
7936 my ($conf) = @_;
7937
7938 return if !defined($conf->{efidisk0});
7939
7940 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
7941 $disk->{size} = get_efivars_size($conf);
7942 $conf->{efidisk0} = print_drive($disk);
7943
7944 return;
7945 }
7946
7947 sub update_tpmstate_size {
7948 my ($conf) = @_;
7949
7950 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
7951 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7952 $conf->{tpmstate0} = print_drive($disk);
7953 }
7954
7955 sub create_efidisk($$$$$$$) {
7956 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
7957
7958 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7959 die "EFI vars default image not found\n" if ! -f $ovmf_vars;
7960
7961 my $vars_size_b = -s $ovmf_vars;
7962 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
7963 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
7964 PVE::Storage::activate_volumes($storecfg, [$volid]);
7965
7966 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
7967 my ($size) = PVE::Storage::volume_size_info($storecfg, $volid, 3);
7968
7969 return ($volid, $size/1024);
7970 }
7971
7972 sub vm_iothreads_list {
7973 my ($vmid) = @_;
7974
7975 my $res = mon_cmd($vmid, 'query-iothreads');
7976
7977 my $iothreads = {};
7978 foreach my $iothread (@$res) {
7979 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
7980 }
7981
7982 return $iothreads;
7983 }
7984
7985 sub scsihw_infos {
7986 my ($conf, $drive) = @_;
7987
7988 my $maxdev = 0;
7989
7990 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
7991 $maxdev = 7;
7992 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
7993 $maxdev = 1;
7994 } else {
7995 $maxdev = 256;
7996 }
7997
7998 my $controller = int($drive->{index} / $maxdev);
7999 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
8000 ? "virtioscsi"
8001 : "scsihw";
8002
8003 return ($maxdev, $controller, $controller_prefix);
8004 }
8005
8006 sub resolve_dst_disk_format {
8007 my ($storecfg, $storeid, $src_volname, $format) = @_;
8008 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
8009
8010 if (!$format) {
8011 # if no target format is specified, use the source disk format as hint
8012 if ($src_volname) {
8013 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8014 $format = qemu_img_format($scfg, $src_volname);
8015 } else {
8016 return $defFormat;
8017 }
8018 }
8019
8020 # test if requested format is supported - else use default
8021 my $supported = grep { $_ eq $format } @$validFormats;
8022 $format = $defFormat if !$supported;
8023 return $format;
8024 }
8025
8026 # NOTE: if this logic changes, please update docs & possibly gui logic
8027 sub find_vmstate_storage {
8028 my ($conf, $storecfg) = @_;
8029
8030 # first, return storage from conf if set
8031 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
8032
8033 my ($target, $shared, $local);
8034
8035 foreach_storage_used_by_vm($conf, sub {
8036 my ($sid) = @_;
8037 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
8038 my $dst = $scfg->{shared} ? \$shared : \$local;
8039 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
8040 });
8041
8042 # second, use shared storage where VM has at least one disk
8043 # third, use local storage where VM has at least one disk
8044 # fall back to local storage
8045 $target = $shared // $local // 'local';
8046
8047 return $target;
8048 }
8049
8050 sub generate_uuid {
8051 my ($uuid, $uuid_str);
8052 UUID::generate($uuid);
8053 UUID::unparse($uuid, $uuid_str);
8054 return $uuid_str;
8055 }
8056
8057 sub generate_smbios1_uuid {
8058 return "uuid=".generate_uuid();
8059 }
8060
8061 sub nbd_stop {
8062 my ($vmid) = @_;
8063
8064 mon_cmd($vmid, 'nbd-server-stop');
8065 }
8066
8067 sub create_reboot_request {
8068 my ($vmid) = @_;
8069 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
8070 or die "failed to create reboot trigger file: $!\n";
8071 close($fh);
8072 }
8073
8074 sub clear_reboot_request {
8075 my ($vmid) = @_;
8076 my $path = "/run/qemu-server/$vmid.reboot";
8077 my $res = 0;
8078
8079 $res = unlink($path);
8080 die "could not remove reboot request for $vmid: $!"
8081 if !$res && $! != POSIX::ENOENT;
8082
8083 return $res;
8084 }
8085
8086 sub bootorder_from_legacy {
8087 my ($conf, $bootcfg) = @_;
8088
8089 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
8090 my $bootindex_hash = {};
8091 my $i = 1;
8092 foreach my $o (split(//, $boot)) {
8093 $bootindex_hash->{$o} = $i*100;
8094 $i++;
8095 }
8096
8097 my $bootorder = {};
8098
8099 PVE::QemuConfig->foreach_volume($conf, sub {
8100 my ($ds, $drive) = @_;
8101
8102 if (drive_is_cdrom ($drive, 1)) {
8103 if ($bootindex_hash->{d}) {
8104 $bootorder->{$ds} = $bootindex_hash->{d};
8105 $bootindex_hash->{d} += 1;
8106 }
8107 } elsif ($bootindex_hash->{c}) {
8108 $bootorder->{$ds} = $bootindex_hash->{c}
8109 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
8110 $bootindex_hash->{c} += 1;
8111 }
8112 });
8113
8114 if ($bootindex_hash->{n}) {
8115 for (my $i = 0; $i < $MAX_NETS; $i++) {
8116 my $netname = "net$i";
8117 next if !$conf->{$netname};
8118 $bootorder->{$netname} = $bootindex_hash->{n};
8119 $bootindex_hash->{n} += 1;
8120 }
8121 }
8122
8123 return $bootorder;
8124 }
8125
8126 # Generate default device list for 'boot: order=' property. Matches legacy
8127 # default boot order, but with explicit device names. This is important, since
8128 # the fallback for when neither 'order' nor the old format is specified relies
8129 # on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
8130 sub get_default_bootdevices {
8131 my ($conf) = @_;
8132
8133 my @ret = ();
8134
8135 # harddisk
8136 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
8137 push @ret, $first if $first;
8138
8139 # cdrom
8140 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
8141 push @ret, $first if $first;
8142
8143 # network
8144 for (my $i = 0; $i < $MAX_NETS; $i++) {
8145 my $netname = "net$i";
8146 next if !$conf->{$netname};
8147 push @ret, $netname;
8148 last;
8149 }
8150
8151 return \@ret;
8152 }
8153
8154 sub device_bootorder {
8155 my ($conf) = @_;
8156
8157 return bootorder_from_legacy($conf) if !defined($conf->{boot});
8158
8159 my $boot = parse_property_string($boot_fmt, $conf->{boot});
8160
8161 my $bootorder = {};
8162 if (!defined($boot) || $boot->{legacy}) {
8163 $bootorder = bootorder_from_legacy($conf, $boot);
8164 } elsif ($boot->{order}) {
8165 my $i = 100; # start at 100 to allow user to insert devices before us with -args
8166 for my $dev (PVE::Tools::split_list($boot->{order})) {
8167 $bootorder->{$dev} = $i++;
8168 }
8169 }
8170
8171 return $bootorder;
8172 }
8173
8174 sub register_qmeventd_handle {
8175 my ($vmid) = @_;
8176
8177 my $fh;
8178 my $peer = "/var/run/qmeventd.sock";
8179 my $count = 0;
8180
8181 for (;;) {
8182 $count++;
8183 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
8184 last if $fh;
8185 if ($! != EINTR && $! != EAGAIN) {
8186 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
8187 }
8188 if ($count > 4) {
8189 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
8190 . "after $count retries\n";
8191 }
8192 usleep(25000);
8193 }
8194
8195 # send handshake to mark VM as backing up
8196 print $fh to_json({vzdump => {vmid => "$vmid"}});
8197
8198 # return handle to be closed later when inhibit is no longer required
8199 return $fh;
8200 }
8201
8202 # bash completion helper
8203
8204 sub complete_backup_archives {
8205 my ($cmdname, $pname, $cvalue) = @_;
8206
8207 my $cfg = PVE::Storage::config();
8208
8209 my $storeid;
8210
8211 if ($cvalue =~ m/^([^:]+):/) {
8212 $storeid = $1;
8213 }
8214
8215 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
8216
8217 my $res = [];
8218 foreach my $id (keys %$data) {
8219 foreach my $item (@{$data->{$id}}) {
8220 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
8221 push @$res, $item->{volid} if defined($item->{volid});
8222 }
8223 }
8224
8225 return $res;
8226 }
8227
8228 my $complete_vmid_full = sub {
8229 my ($running) = @_;
8230
8231 my $idlist = vmstatus();
8232
8233 my $res = [];
8234
8235 foreach my $id (keys %$idlist) {
8236 my $d = $idlist->{$id};
8237 if (defined($running)) {
8238 next if $d->{template};
8239 next if $running && $d->{status} ne 'running';
8240 next if !$running && $d->{status} eq 'running';
8241 }
8242 push @$res, $id;
8243
8244 }
8245 return $res;
8246 };
8247
8248 sub complete_vmid {
8249 return &$complete_vmid_full();
8250 }
8251
8252 sub complete_vmid_stopped {
8253 return &$complete_vmid_full(0);
8254 }
8255
8256 sub complete_vmid_running {
8257 return &$complete_vmid_full(1);
8258 }
8259
8260 sub complete_storage {
8261
8262 my $cfg = PVE::Storage::config();
8263 my $ids = $cfg->{ids};
8264
8265 my $res = [];
8266 foreach my $sid (keys %$ids) {
8267 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
8268 next if !$ids->{$sid}->{content}->{images};
8269 push @$res, $sid;
8270 }
8271
8272 return $res;
8273 }
8274
8275 sub complete_migration_storage {
8276 my ($cmd, $param, $current_value, $all_args) = @_;
8277
8278 my $targetnode = @$all_args[1];
8279
8280 my $cfg = PVE::Storage::config();
8281 my $ids = $cfg->{ids};
8282
8283 my $res = [];
8284 foreach my $sid (keys %$ids) {
8285 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
8286 next if !$ids->{$sid}->{content}->{images};
8287 push @$res, $sid;
8288 }
8289
8290 return $res;
8291 }
8292
8293 sub vm_is_paused {
8294 my ($vmid) = @_;
8295 my $qmpstatus = eval {
8296 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8297 mon_cmd($vmid, "query-status");
8298 };
8299 warn "$@\n" if $@;
8300 return $qmpstatus && $qmpstatus->{status} eq "paused";
8301 }
8302
8303 sub check_volume_storage_type {
8304 my ($storecfg, $vol) = @_;
8305
8306 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8307 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8308 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8309
8310 die "storage '$storeid' does not support content-type '$vtype'\n"
8311 if !$scfg->{content}->{$vtype};
8312
8313 return 1;
8314 }
8315
8316 sub add_nets_bridge_fdb {
8317 my ($conf, $vmid) = @_;
8318
8319 foreach my $opt (keys %$conf) {
8320 if ($opt =~ m/^net(\d+)$/) {
8321 my $net = parse_net($conf->{$opt});
8322 next if !$net;
8323 next if !$net->{macaddr};
8324
8325 my $iface = "tap${vmid}i$1";
8326 if ($have_sdn) {
8327 PVE::Network::SDN::Zones::add_bridge_fdb($iface, $net->{macaddr}, $net->{bridge}, $net->{firewall});
8328 } else {
8329 PVE::Network::add_bridge_fdb($iface, $net->{macaddr}, $net->{firewall});
8330 }
8331 }
8332 }
8333 }
8334 1;