]> git.proxmox.com Git - qemu-server.git/blame_incremental - PVE/QemuServer.pm
schema: use pve-bridge-id
[qemu-server.git] / PVE / QemuServer.pm
... / ...
CommitLineData
1package PVE::QemuServer;
2
3use strict;
4use warnings;
5
6use Cwd 'abs_path';
7use Digest::SHA;
8use Fcntl ':flock';
9use Fcntl;
10use File::Basename;
11use File::Copy qw(copy);
12use File::Path;
13use File::stat;
14use Getopt::Long;
15use IO::Dir;
16use IO::File;
17use IO::Handle;
18use IO::Select;
19use IO::Socket::UNIX;
20use IPC::Open3;
21use JSON;
22use MIME::Base64;
23use POSIX;
24use Storable qw(dclone);
25use Time::HiRes qw(gettimeofday usleep);
26use URI::Escape;
27use UUID;
28
29use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
30use PVE::CGroup;
31use PVE::DataCenterConfig;
32use PVE::Exception qw(raise raise_param_exc);
33use PVE::Format qw(render_duration render_bytes);
34use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
35use PVE::INotify;
36use PVE::JSONSchema qw(get_standard_option parse_property_string);
37use PVE::ProcFSTools;
38use PVE::PBSClient;
39use PVE::RPCEnvironment;
40use PVE::Storage;
41use PVE::SysFSTools;
42use PVE::Systemd;
43use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
44
45use PVE::QMPClient;
46use PVE::QemuConfig;
47use PVE::QemuServer::Helpers qw(min_version config_aware_timeout);
48use PVE::QemuServer::Cloudinit;
49use PVE::QemuServer::CGroup;
50use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
51use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
52use PVE::QemuServer::Machine;
53use PVE::QemuServer::Memory;
54use PVE::QemuServer::Monitor qw(mon_cmd);
55use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
56use PVE::QemuServer::USB qw(parse_usb_device);
57
58my $have_sdn;
59eval {
60 require PVE::Network::SDN::Zones;
61 $have_sdn = 1;
62};
63
64my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
65my $OVMF = {
66 x86_64 => {
67 '4m-no-smm' => [
68 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
69 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
70 ],
71 '4m-no-smm-ms' => [
72 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
73 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
74 ],
75 '4m' => [
76 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
77 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
78 ],
79 '4m-ms' => [
80 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
81 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
82 ],
83 default => [
84 "$EDK2_FW_BASE/OVMF_CODE.fd",
85 "$EDK2_FW_BASE/OVMF_VARS.fd",
86 ],
87 },
88 aarch64 => {
89 default => [
90 "$EDK2_FW_BASE/AAVMF_CODE.fd",
91 "$EDK2_FW_BASE/AAVMF_VARS.fd",
92 ],
93 },
94};
95
96my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
97
98# Note about locking: we use flock on the config file protect against concurent actions.
99# Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
100# 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
101# But you can ignore this kind of lock with the --skiplock flag.
102
103cfs_register_file('/qemu-server/',
104 \&parse_vm_config,
105 \&write_vm_config);
106
107PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
108 description => "Some command save/restore state from this location.",
109 type => 'string',
110 maxLength => 128,
111 optional => 1,
112});
113
114PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
115 description => "Specifies the Qemu machine type.",
116 type => 'string',
117 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
118 maxLength => 40,
119 optional => 1,
120});
121
122PVE::JSONSchema::register_standard_option('pve-targetstorage', {
123 description => "Mapping from source to target storages. Providing only a single storage ID maps all source storages to that storage. Providing the special value '1' will map each source storage to itself.",
124 type => 'string',
125 format => 'storage-pair-list',
126 optional => 1,
127});
128
129#no warnings 'redefine';
130
131my $nodename_cache;
132sub nodename {
133 $nodename_cache //= PVE::INotify::nodename();
134 return $nodename_cache;
135}
136
137my $watchdog_fmt = {
138 model => {
139 default_key => 1,
140 type => 'string',
141 enum => [qw(i6300esb ib700)],
142 description => "Watchdog type to emulate.",
143 default => 'i6300esb',
144 optional => 1,
145 },
146 action => {
147 type => 'string',
148 enum => [qw(reset shutdown poweroff pause debug none)],
149 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
150 optional => 1,
151 },
152};
153PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
154
155my $agent_fmt = {
156 enabled => {
157 description => "Enable/disable communication with a Qemu Guest Agent (QGA) running in the VM.",
158 type => 'boolean',
159 default => 0,
160 default_key => 1,
161 },
162 fstrim_cloned_disks => {
163 description => "Run fstrim after moving a disk or migrating the VM.",
164 type => 'boolean',
165 optional => 1,
166 default => 0
167 },
168 type => {
169 description => "Select the agent type",
170 type => 'string',
171 default => 'virtio',
172 optional => 1,
173 enum => [qw(virtio isa)],
174 },
175};
176
177my $vga_fmt = {
178 type => {
179 description => "Select the VGA type.",
180 type => 'string',
181 default => 'std',
182 optional => 1,
183 default_key => 1,
184 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio vmware)],
185 },
186 memory => {
187 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
188 type => 'integer',
189 optional => 1,
190 minimum => 4,
191 maximum => 512,
192 },
193};
194
195my $ivshmem_fmt = {
196 size => {
197 type => 'integer',
198 minimum => 1,
199 description => "The size of the file in MB.",
200 },
201 name => {
202 type => 'string',
203 pattern => '[a-zA-Z0-9\-]+',
204 optional => 1,
205 format_description => 'string',
206 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
207 },
208};
209
210my $audio_fmt = {
211 device => {
212 type => 'string',
213 enum => [qw(ich9-intel-hda intel-hda AC97)],
214 description => "Configure an audio device."
215 },
216 driver => {
217 type => 'string',
218 enum => ['spice', 'none'],
219 default => 'spice',
220 optional => 1,
221 description => "Driver backend for the audio device."
222 },
223};
224
225my $spice_enhancements_fmt = {
226 foldersharing => {
227 type => 'boolean',
228 optional => 1,
229 default => '0',
230 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
231 },
232 videostreaming => {
233 type => 'string',
234 enum => ['off', 'all', 'filter'],
235 default => 'off',
236 optional => 1,
237 description => "Enable video streaming. Uses compression for detected video streams."
238 },
239};
240
241my $rng_fmt = {
242 source => {
243 type => 'string',
244 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
245 default_key => 1,
246 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
247 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
248 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
249 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
250 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
251 ." a hardware RNG from the host.",
252 },
253 max_bytes => {
254 type => 'integer',
255 description => "Maximum bytes of entropy allowed to get injected into the guest every"
256 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
257 ." `0` to disable limiting (potentially dangerous!).",
258 optional => 1,
259
260 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
261 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
262 # reading from /dev/urandom
263 default => 1024,
264 },
265 period => {
266 type => 'integer',
267 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
268 ." the guest to retrieve another 'max_bytes' of entropy.",
269 optional => 1,
270 default => 1000,
271 },
272};
273
274my $meta_info_fmt = {
275 'ctime' => {
276 type => 'integer',
277 description => "The guest creation timestamp as UNIX epoch time",
278 minimum => 0,
279 optional => 1,
280 },
281 'creation-qemu' => {
282 type => 'string',
283 description => "The QEMU (machine) version from the time this VM was created.",
284 pattern => '\d+(\.\d+)+',
285 optional => 1,
286 },
287};
288
289my $confdesc = {
290 onboot => {
291 optional => 1,
292 type => 'boolean',
293 description => "Specifies whether a VM will be started during system bootup.",
294 default => 0,
295 },
296 autostart => {
297 optional => 1,
298 type => 'boolean',
299 description => "Automatic restart after crash (currently ignored).",
300 default => 0,
301 },
302 hotplug => {
303 optional => 1,
304 type => 'string', format => 'pve-hotplug-features',
305 description => "Selectively enable hotplug features. This is a comma separated list of"
306 ." hotplug features: 'network', 'disk', 'cpu', 'memory' and 'usb'. Use '0' to disable"
307 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`.",
308 default => 'network,disk,usb',
309 },
310 reboot => {
311 optional => 1,
312 type => 'boolean',
313 description => "Allow reboot. If set to '0' the VM exit on reboot.",
314 default => 1,
315 },
316 lock => {
317 optional => 1,
318 type => 'string',
319 description => "Lock/unlock the VM.",
320 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
321 },
322 cpulimit => {
323 optional => 1,
324 type => 'number',
325 description => "Limit of CPU usage.",
326 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
327 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
328 minimum => 0,
329 maximum => 128,
330 default => 0,
331 },
332 cpuunits => {
333 optional => 1,
334 type => 'integer',
335 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
336 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
337 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
338 ." weights of all the other running VMs.",
339 minimum => 2,
340 maximum => 262144,
341 default => 'cgroup v1: 1024, cgroup v2: 100',
342 },
343 memory => {
344 optional => 1,
345 type => 'integer',
346 description => "Amount of RAM for the VM in MB. This is the maximum available memory when"
347 ." you use the balloon device.",
348 minimum => 16,
349 default => 512,
350 },
351 balloon => {
352 optional => 1,
353 type => 'integer',
354 description => "Amount of target RAM for the VM in MB. Using zero disables the ballon driver.",
355 minimum => 0,
356 },
357 shares => {
358 optional => 1,
359 type => 'integer',
360 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
361 ." more memory this VM gets. Number is relative to weights of all other running VMs."
362 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
363 minimum => 0,
364 maximum => 50000,
365 default => 1000,
366 },
367 keyboard => {
368 optional => 1,
369 type => 'string',
370 description => "Keyboard layout for VNC server. The default is read from the"
371 ."'/etc/pve/datacenter.cfg' configuration file. It should not be necessary to set it.",
372 enum => PVE::Tools::kvmkeymaplist(),
373 default => undef,
374 },
375 name => {
376 optional => 1,
377 type => 'string', format => 'dns-name',
378 description => "Set a name for the VM. Only used on the configuration web interface.",
379 },
380 scsihw => {
381 optional => 1,
382 type => 'string',
383 description => "SCSI controller model",
384 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
385 default => 'lsi',
386 },
387 description => {
388 optional => 1,
389 type => 'string',
390 description => "Description for the VM. Shown in the web-interface VM's summary."
391 ." This is saved as comment inside the configuration file.",
392 maxLength => 1024 * 8,
393 },
394 ostype => {
395 optional => 1,
396 type => 'string',
397 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
398 description => "Specify guest operating system.",
399 verbose_description => <<EODESC,
400Specify guest operating system. This is used to enable special
401optimization/features for specific operating systems:
402
403[horizontal]
404other;; unspecified OS
405wxp;; Microsoft Windows XP
406w2k;; Microsoft Windows 2000
407w2k3;; Microsoft Windows 2003
408w2k8;; Microsoft Windows 2008
409wvista;; Microsoft Windows Vista
410win7;; Microsoft Windows 7
411win8;; Microsoft Windows 8/2012/2012r2
412win10;; Microsoft Windows 10/2016/2019
413win11;; Microsoft Windows 11/2022
414l24;; Linux 2.4 Kernel
415l26;; Linux 2.6 - 5.X Kernel
416solaris;; Solaris/OpenSolaris/OpenIndiania kernel
417EODESC
418 },
419 boot => {
420 optional => 1,
421 type => 'string', format => 'pve-qm-boot',
422 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
423 ." key or 'legacy=' is deprecated.",
424 },
425 bootdisk => {
426 optional => 1,
427 type => 'string', format => 'pve-qm-bootdisk',
428 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
429 pattern => '(ide|sata|scsi|virtio)\d+',
430 },
431 smp => {
432 optional => 1,
433 type => 'integer',
434 description => "The number of CPUs. Please use option -sockets instead.",
435 minimum => 1,
436 default => 1,
437 },
438 sockets => {
439 optional => 1,
440 type => 'integer',
441 description => "The number of CPU sockets.",
442 minimum => 1,
443 default => 1,
444 },
445 cores => {
446 optional => 1,
447 type => 'integer',
448 description => "The number of cores per socket.",
449 minimum => 1,
450 default => 1,
451 },
452 numa => {
453 optional => 1,
454 type => 'boolean',
455 description => "Enable/disable NUMA.",
456 default => 0,
457 },
458 hugepages => {
459 optional => 1,
460 type => 'string',
461 description => "Enable/disable hugepages memory.",
462 enum => [qw(any 2 1024)],
463 },
464 keephugepages => {
465 optional => 1,
466 type => 'boolean',
467 default => 0,
468 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
469 ." after VM shutdown and can be used for subsequent starts.",
470 },
471 vcpus => {
472 optional => 1,
473 type => 'integer',
474 description => "Number of hotplugged vcpus.",
475 minimum => 1,
476 default => 0,
477 },
478 acpi => {
479 optional => 1,
480 type => 'boolean',
481 description => "Enable/disable ACPI.",
482 default => 1,
483 },
484 agent => {
485 optional => 1,
486 description => "Enable/disable communication with the Qemu Guest Agent and its properties.",
487 type => 'string',
488 format => $agent_fmt,
489 },
490 kvm => {
491 optional => 1,
492 type => 'boolean',
493 description => "Enable/disable KVM hardware virtualization.",
494 default => 1,
495 },
496 tdf => {
497 optional => 1,
498 type => 'boolean',
499 description => "Enable/disable time drift fix.",
500 default => 0,
501 },
502 localtime => {
503 optional => 1,
504 type => 'boolean',
505 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
506 ." the `ostype` indicates a Microsoft Windows OS.",
507 },
508 freeze => {
509 optional => 1,
510 type => 'boolean',
511 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
512 },
513 vga => {
514 optional => 1,
515 type => 'string', format => $vga_fmt,
516 description => "Configure the VGA hardware.",
517 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
518 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
519 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
520 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
521 ." display server. For win* OS you can select how many independent displays you want,"
522 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
523 ." using a serial device as terminal.",
524 },
525 watchdog => {
526 optional => 1,
527 type => 'string', format => 'pve-qm-watchdog',
528 description => "Create a virtual hardware watchdog device.",
529 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
530 ." action), the watchdog must be periodically polled by an agent inside the guest or"
531 ." else the watchdog will reset the guest (or execute the respective action specified)",
532 },
533 startdate => {
534 optional => 1,
535 type => 'string',
536 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
537 description => "Set the initial date of the real time clock. Valid format for date are:"
538 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
539 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
540 default => 'now',
541 },
542 startup => get_standard_option('pve-startup-order'),
543 template => {
544 optional => 1,
545 type => 'boolean',
546 description => "Enable/disable Template.",
547 default => 0,
548 },
549 args => {
550 optional => 1,
551 type => 'string',
552 description => "Arbitrary arguments passed to kvm.",
553 verbose_description => <<EODESCR,
554Arbitrary arguments passed to kvm, for example:
555
556args: -no-reboot -no-hpet
557
558NOTE: this option is for experts only.
559EODESCR
560 },
561 tablet => {
562 optional => 1,
563 type => 'boolean',
564 default => 1,
565 description => "Enable/disable the USB tablet device.",
566 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
567 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
568 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
569 ." may consider disabling this to save some context switches. This is turned off by"
570 ." default if you use spice (`qm set <vmid> --vga qxl`).",
571 },
572 migrate_speed => {
573 optional => 1,
574 type => 'integer',
575 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
576 minimum => 0,
577 default => 0,
578 },
579 migrate_downtime => {
580 optional => 1,
581 type => 'number',
582 description => "Set maximum tolerated downtime (in seconds) for migrations.",
583 minimum => 0,
584 default => 0.1,
585 },
586 cdrom => {
587 optional => 1,
588 type => 'string', format => 'pve-qm-ide',
589 typetext => '<volume>',
590 description => "This is an alias for option -ide2",
591 },
592 cpu => {
593 optional => 1,
594 description => "Emulated CPU type.",
595 type => 'string',
596 format => 'pve-vm-cpu-conf',
597 },
598 parent => get_standard_option('pve-snapshot-name', {
599 optional => 1,
600 description => "Parent snapshot name. This is used internally, and should not be modified.",
601 }),
602 snaptime => {
603 optional => 1,
604 description => "Timestamp for snapshots.",
605 type => 'integer',
606 minimum => 0,
607 },
608 vmstate => {
609 optional => 1,
610 type => 'string', format => 'pve-volume-id',
611 description => "Reference to a volume which stores the VM state. This is used internally"
612 ." for snapshots.",
613 },
614 vmstatestorage => get_standard_option('pve-storage-id', {
615 description => "Default storage for VM state volumes/files.",
616 optional => 1,
617 }),
618 runningmachine => get_standard_option('pve-qemu-machine', {
619 description => "Specifies the QEMU machine type of the running vm. This is used internally"
620 ." for snapshots.",
621 }),
622 runningcpu => {
623 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
624 ." internally for snapshots.",
625 optional => 1,
626 type => 'string',
627 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
628 format_description => 'QEMU -cpu parameter'
629 },
630 machine => get_standard_option('pve-qemu-machine'),
631 arch => {
632 description => "Virtual processor architecture. Defaults to the host.",
633 optional => 1,
634 type => 'string',
635 enum => [qw(x86_64 aarch64)],
636 },
637 smbios1 => {
638 description => "Specify SMBIOS type 1 fields.",
639 type => 'string', format => 'pve-qm-smbios1',
640 maxLength => 512,
641 optional => 1,
642 },
643 protection => {
644 optional => 1,
645 type => 'boolean',
646 description => "Sets the protection flag of the VM. This will disable the remove VM and"
647 ." remove disk operations.",
648 default => 0,
649 },
650 bios => {
651 optional => 1,
652 type => 'string',
653 enum => [ qw(seabios ovmf) ],
654 description => "Select BIOS implementation.",
655 default => 'seabios',
656 },
657 vmgenid => {
658 type => 'string',
659 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
660 format_description => 'UUID',
661 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
662 ." to disable explicitly.",
663 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
664 ." value identifier to the guest OS. This allows to notify the guest operating system"
665 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
666 ." execution or creation from a template). The guest operating system notices the"
667 ." change, and is then able to react as appropriate by marking its copies of"
668 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
669 ."Note that auto-creation only works when done through API/CLI create or update methods"
670 .", but not when manually editing the config file.",
671 default => "1 (autogenerated)",
672 optional => 1,
673 },
674 hookscript => {
675 type => 'string',
676 format => 'pve-volume-id',
677 optional => 1,
678 description => "Script that will be executed during various steps in the vms lifetime.",
679 },
680 ivshmem => {
681 type => 'string',
682 format => $ivshmem_fmt,
683 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
684 ." the host.",
685 optional => 1,
686 },
687 audio0 => {
688 type => 'string',
689 format => $audio_fmt,
690 description => "Configure a audio device, useful in combination with QXL/Spice.",
691 optional => 1
692 },
693 spice_enhancements => {
694 type => 'string',
695 format => $spice_enhancements_fmt,
696 description => "Configure additional enhancements for SPICE.",
697 optional => 1
698 },
699 tags => {
700 type => 'string', format => 'pve-tag-list',
701 description => 'Tags of the VM. This is only meta information.',
702 optional => 1,
703 },
704 rng0 => {
705 type => 'string',
706 format => $rng_fmt,
707 description => "Configure a VirtIO-based Random Number Generator.",
708 optional => 1,
709 },
710 meta => {
711 type => 'string',
712 format => $meta_info_fmt,
713 description => "Some (read-only) meta-information about this guest.",
714 optional => 1,
715 },
716};
717
718my $cicustom_fmt = {
719 meta => {
720 type => 'string',
721 optional => 1,
722 description => 'Specify a custom file containing all meta data passed to the VM via"
723 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
724 format => 'pve-volume-id',
725 format_description => 'volume',
726 },
727 network => {
728 type => 'string',
729 optional => 1,
730 description => 'Specify a custom file containing all network data passed to the VM via'
731 .' cloud-init.',
732 format => 'pve-volume-id',
733 format_description => 'volume',
734 },
735 user => {
736 type => 'string',
737 optional => 1,
738 description => 'Specify a custom file containing all user data passed to the VM via'
739 .' cloud-init.',
740 format => 'pve-volume-id',
741 format_description => 'volume',
742 },
743 vendor => {
744 type => 'string',
745 optional => 1,
746 description => 'Specify a custom file containing all vendor data passed to the VM via'
747 .' cloud-init.',
748 format => 'pve-volume-id',
749 format_description => 'volume',
750 },
751};
752PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
753
754my $confdesc_cloudinit = {
755 citype => {
756 optional => 1,
757 type => 'string',
758 description => 'Specifies the cloud-init configuration format. The default depends on the'
759 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
760 .' and `configdrive2` for windows.',
761 enum => ['configdrive2', 'nocloud', 'opennebula'],
762 },
763 ciuser => {
764 optional => 1,
765 type => 'string',
766 description => "cloud-init: User name to change ssh keys and password for instead of the"
767 ." image's configured default user.",
768 },
769 cipassword => {
770 optional => 1,
771 type => 'string',
772 description => 'cloud-init: Password to assign the user. Using this is generally not'
773 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
774 .' support hashed passwords.',
775 },
776 cicustom => {
777 optional => 1,
778 type => 'string',
779 description => 'cloud-init: Specify custom files to replace the automatically generated'
780 .' ones at start.',
781 format => 'pve-qm-cicustom',
782 },
783 searchdomain => {
784 optional => 1,
785 type => 'string',
786 description => "cloud-init: Sets DNS search domains for a container. Create will'
787 .' automatically use the setting from the host if neither searchdomain nor nameserver'
788 .' are set.",
789 },
790 nameserver => {
791 optional => 1,
792 type => 'string', format => 'address-list',
793 description => "cloud-init: Sets DNS server IP address for a container. Create will'
794 .' automatically use the setting from the host if neither searchdomain nor nameserver'
795 .' are set.",
796 },
797 sshkeys => {
798 optional => 1,
799 type => 'string',
800 format => 'urlencoded',
801 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
802 },
803};
804
805# what about other qemu settings ?
806#cpu => 'string',
807#machine => 'string',
808#fda => 'file',
809#fdb => 'file',
810#mtdblock => 'file',
811#sd => 'file',
812#pflash => 'file',
813#snapshot => 'bool',
814#bootp => 'file',
815##tftp => 'dir',
816##smb => 'dir',
817#kernel => 'file',
818#append => 'string',
819#initrd => 'file',
820##soundhw => 'string',
821
822while (my ($k, $v) = each %$confdesc) {
823 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
824}
825
826my $MAX_USB_DEVICES = 5;
827my $MAX_NETS = 32;
828my $MAX_SERIAL_PORTS = 4;
829my $MAX_PARALLEL_PORTS = 3;
830my $MAX_NUMA = 8;
831
832my $numa_fmt = {
833 cpus => {
834 type => "string",
835 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
836 description => "CPUs accessing this NUMA node.",
837 format_description => "id[-id];...",
838 },
839 memory => {
840 type => "number",
841 description => "Amount of memory this NUMA node provides.",
842 optional => 1,
843 },
844 hostnodes => {
845 type => "string",
846 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
847 description => "Host NUMA nodes to use.",
848 format_description => "id[-id];...",
849 optional => 1,
850 },
851 policy => {
852 type => 'string',
853 enum => [qw(preferred bind interleave)],
854 description => "NUMA allocation policy.",
855 optional => 1,
856 },
857};
858PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
859my $numadesc = {
860 optional => 1,
861 type => 'string', format => $numa_fmt,
862 description => "NUMA topology.",
863};
864PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
865
866for (my $i = 0; $i < $MAX_NUMA; $i++) {
867 $confdesc->{"numa$i"} = $numadesc;
868}
869
870my $nic_model_list = [
871 'e1000',
872 'e1000-82540em',
873 'e1000-82544gc',
874 'e1000-82545em',
875 'e1000e',
876 'i82551',
877 'i82557b',
878 'i82559er',
879 'ne2k_isa',
880 'ne2k_pci',
881 'pcnet',
882 'rtl8139',
883 'virtio',
884 'vmxnet3',
885];
886my $nic_model_list_txt = join(' ', sort @$nic_model_list);
887
888my $net_fmt_bridge_descr = <<__EOD__;
889Bridge to attach the network device to. The Proxmox VE standard bridge
890is called 'vmbr0'.
891
892If you do not specify a bridge, we create a kvm user (NATed) network
893device, which provides DHCP and DNS services. The following addresses
894are used:
895
896 10.0.2.2 Gateway
897 10.0.2.3 DNS Server
898 10.0.2.4 SMB Server
899
900The DHCP server assign addresses to the guest starting from 10.0.2.15.
901__EOD__
902
903my $net_fmt = {
904 macaddr => get_standard_option('mac-addr', {
905 description => "MAC address. That address must be unique withing your network. This is"
906 ." automatically generated if not specified.",
907 }),
908 model => {
909 type => 'string',
910 description => "Network Card Model. The 'virtio' model provides the best performance with"
911 ." very low CPU overhead. If your guest does not support this driver, it is usually"
912 ." best to use 'e1000'.",
913 enum => $nic_model_list,
914 default_key => 1,
915 },
916 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
917 bridge => get_standard_option('pve-bridge-id', {
918 description => $net_fmt_bridge_descr,
919 optional => 1,
920 }),
921 queues => {
922 type => 'integer',
923 minimum => 0, maximum => 16,
924 description => 'Number of packet queues to be used on the device.',
925 optional => 1,
926 },
927 rate => {
928 type => 'number',
929 minimum => 0,
930 description => "Rate limit in mbps (megabytes per second) as floating point number.",
931 optional => 1,
932 },
933 tag => {
934 type => 'integer',
935 minimum => 1, maximum => 4094,
936 description => 'VLAN tag to apply to packets on this interface.',
937 optional => 1,
938 },
939 trunks => {
940 type => 'string',
941 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
942 description => 'VLAN trunks to pass through this interface.',
943 format_description => 'vlanid[;vlanid...]',
944 optional => 1,
945 },
946 firewall => {
947 type => 'boolean',
948 description => 'Whether this interface should be protected by the firewall.',
949 optional => 1,
950 },
951 link_down => {
952 type => 'boolean',
953 description => 'Whether this interface should be disconnected (like pulling the plug).',
954 optional => 1,
955 },
956 mtu => {
957 type => 'integer',
958 minimum => 1, maximum => 65520,
959 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
960 optional => 1,
961 },
962};
963
964my $netdesc = {
965 optional => 1,
966 type => 'string', format => $net_fmt,
967 description => "Specify network devices.",
968};
969
970PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
971
972my $ipconfig_fmt = {
973 ip => {
974 type => 'string',
975 format => 'pve-ipv4-config',
976 format_description => 'IPv4Format/CIDR',
977 description => 'IPv4 address in CIDR format.',
978 optional => 1,
979 default => 'dhcp',
980 },
981 gw => {
982 type => 'string',
983 format => 'ipv4',
984 format_description => 'GatewayIPv4',
985 description => 'Default gateway for IPv4 traffic.',
986 optional => 1,
987 requires => 'ip',
988 },
989 ip6 => {
990 type => 'string',
991 format => 'pve-ipv6-config',
992 format_description => 'IPv6Format/CIDR',
993 description => 'IPv6 address in CIDR format.',
994 optional => 1,
995 default => 'dhcp',
996 },
997 gw6 => {
998 type => 'string',
999 format => 'ipv6',
1000 format_description => 'GatewayIPv6',
1001 description => 'Default gateway for IPv6 traffic.',
1002 optional => 1,
1003 requires => 'ip6',
1004 },
1005};
1006PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
1007my $ipconfigdesc = {
1008 optional => 1,
1009 type => 'string', format => 'pve-qm-ipconfig',
1010 description => <<'EODESCR',
1011cloud-init: Specify IP addresses and gateways for the corresponding interface.
1012
1013IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1014
1015The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1016gateway should be provided.
1017For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1018cloud-init 19.4 or newer.
1019
1020If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1021dhcp on IPv4.
1022EODESCR
1023};
1024PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1025
1026for (my $i = 0; $i < $MAX_NETS; $i++) {
1027 $confdesc->{"net$i"} = $netdesc;
1028 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1029}
1030
1031foreach my $key (keys %$confdesc_cloudinit) {
1032 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1033}
1034
1035PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1036sub verify_volume_id_or_qm_path {
1037 my ($volid, $noerr) = @_;
1038
1039 if ($volid eq 'none' || $volid eq 'cdrom' || $volid =~ m|^/|) {
1040 return $volid;
1041 }
1042
1043 # if its neither 'none' nor 'cdrom' nor a path, check if its a volume-id
1044 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1045 if ($@) {
1046 return if $noerr;
1047 die $@;
1048 }
1049 return $volid;
1050}
1051
1052my $usb_fmt = {
1053 host => {
1054 default_key => 1,
1055 type => 'string', format => 'pve-qm-usb-device',
1056 format_description => 'HOSTUSBDEVICE|spice',
1057 description => <<EODESCR,
1058The Host USB device or port or the value 'spice'. HOSTUSBDEVICE syntax is:
1059
1060 'bus-port(.port)*' (decimal numbers) or
1061 'vendor_id:product_id' (hexadeciaml numbers) or
1062 'spice'
1063
1064You can use the 'lsusb -t' command to list existing usb devices.
1065
1066NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1067machines - use with special care.
1068
1069The value 'spice' can be used to add a usb redirection devices for spice.
1070EODESCR
1071 },
1072 usb3 => {
1073 optional => 1,
1074 type => 'boolean',
1075 description => "Specifies whether if given host option is a USB3 device or port.",
1076 default => 0,
1077 },
1078};
1079
1080my $usbdesc = {
1081 optional => 1,
1082 type => 'string', format => $usb_fmt,
1083 description => "Configure an USB device (n is 0 to 4).",
1084};
1085PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
1086
1087my $serialdesc = {
1088 optional => 1,
1089 type => 'string',
1090 pattern => '(/dev/.+|socket)',
1091 description => "Create a serial device inside the VM (n is 0 to 3)",
1092 verbose_description => <<EODESCR,
1093Create a serial device inside the VM (n is 0 to 3), and pass through a
1094host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1095host side (use 'qm terminal' to open a terminal connection).
1096
1097NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1098use with special care.
1099
1100CAUTION: Experimental! User reported problems with this option.
1101EODESCR
1102};
1103
1104my $paralleldesc= {
1105 optional => 1,
1106 type => 'string',
1107 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1108 description => "Map host parallel devices (n is 0 to 2).",
1109 verbose_description => <<EODESCR,
1110Map host parallel devices (n is 0 to 2).
1111
1112NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1113machines - use with special care.
1114
1115CAUTION: Experimental! User reported problems with this option.
1116EODESCR
1117};
1118
1119for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1120 $confdesc->{"parallel$i"} = $paralleldesc;
1121}
1122
1123for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1124 $confdesc->{"serial$i"} = $serialdesc;
1125}
1126
1127for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1128 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1129}
1130
1131for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1132 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1133}
1134
1135for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1136 $confdesc->{"usb$i"} = $usbdesc;
1137}
1138
1139my $boot_fmt = {
1140 legacy => {
1141 optional => 1,
1142 default_key => 1,
1143 type => 'string',
1144 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1145 . " Deprecated, use 'order=' instead.",
1146 pattern => '[acdn]{1,4}',
1147 format_description => "[acdn]{1,4}",
1148
1149 # note: this is also the fallback if boot: is not given at all
1150 default => 'cdn',
1151 },
1152 order => {
1153 optional => 1,
1154 type => 'string',
1155 format => 'pve-qm-bootdev-list',
1156 format_description => "device[;device...]",
1157 description => <<EODESC,
1158The guest will attempt to boot from devices in the order they appear here.
1159
1160Disks, optical drives and passed-through storage USB devices will be directly
1161booted from, NICs will load PXE, and PCIe devices will either behave like disks
1162(e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1163
1164Note that only devices in this list will be marked as bootable and thus loaded
1165by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1166(e.g. software-raid), you need to specify all of them here.
1167
1168Overrides the deprecated 'legacy=[acdn]*' value when given.
1169EODESC
1170 },
1171};
1172PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1173
1174PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1175sub verify_bootdev {
1176 my ($dev, $noerr) = @_;
1177
1178 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1179 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1180
1181 my $check = sub {
1182 my ($base) = @_;
1183 return 0 if $dev !~ m/^$base\d+$/;
1184 return 0 if !$confdesc->{$dev};
1185 return 1;
1186 };
1187
1188 return $dev if $check->("net");
1189 return $dev if $check->("usb");
1190 return $dev if $check->("hostpci");
1191
1192 return if $noerr;
1193 die "invalid boot device '$dev'\n";
1194}
1195
1196sub print_bootorder {
1197 my ($devs) = @_;
1198 return "" if !@$devs;
1199 my $data = { order => join(';', @$devs) };
1200 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1201}
1202
1203my $kvm_api_version = 0;
1204
1205sub kvm_version {
1206 return $kvm_api_version if $kvm_api_version;
1207
1208 open my $fh, '<', '/dev/kvm' or return;
1209
1210 # 0xae00 => KVM_GET_API_VERSION
1211 $kvm_api_version = ioctl($fh, 0xae00, 0);
1212 close($fh);
1213
1214 return $kvm_api_version;
1215}
1216
1217my $kvm_user_version = {};
1218my $kvm_mtime = {};
1219
1220sub kvm_user_version {
1221 my ($binary) = @_;
1222
1223 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1224 my $st = stat($binary);
1225
1226 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1227 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1228 $cachedmtime == $st->mtime;
1229
1230 $kvm_user_version->{$binary} = 'unknown';
1231 $kvm_mtime->{$binary} = $st->mtime;
1232
1233 my $code = sub {
1234 my $line = shift;
1235 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1236 $kvm_user_version->{$binary} = $2;
1237 }
1238 };
1239
1240 eval { run_command([$binary, '--version'], outfunc => $code); };
1241 warn $@ if $@;
1242
1243 return $kvm_user_version->{$binary};
1244
1245}
1246my sub extract_version {
1247 my ($machine_type, $version) = @_;
1248 $version = kvm_user_version() if !defined($version);
1249 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
1250}
1251
1252sub kernel_has_vhost_net {
1253 return -c '/dev/vhost-net';
1254}
1255
1256sub option_exists {
1257 my $key = shift;
1258 return defined($confdesc->{$key});
1259}
1260
1261my $cdrom_path;
1262sub get_cdrom_path {
1263
1264 return $cdrom_path if $cdrom_path;
1265
1266 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
1267 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
1268 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
1269}
1270
1271sub get_iso_path {
1272 my ($storecfg, $vmid, $cdrom) = @_;
1273
1274 if ($cdrom eq 'cdrom') {
1275 return get_cdrom_path();
1276 } elsif ($cdrom eq 'none') {
1277 return '';
1278 } elsif ($cdrom =~ m|^/|) {
1279 return $cdrom;
1280 } else {
1281 return PVE::Storage::path($storecfg, $cdrom);
1282 }
1283}
1284
1285# try to convert old style file names to volume IDs
1286sub filename_to_volume_id {
1287 my ($vmid, $file, $media) = @_;
1288
1289 if (!($file eq 'none' || $file eq 'cdrom' ||
1290 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1291
1292 return if $file =~ m|/|;
1293
1294 if ($media && $media eq 'cdrom') {
1295 $file = "local:iso/$file";
1296 } else {
1297 $file = "local:$vmid/$file";
1298 }
1299 }
1300
1301 return $file;
1302}
1303
1304sub verify_media_type {
1305 my ($opt, $vtype, $media) = @_;
1306
1307 return if !$media;
1308
1309 my $etype;
1310 if ($media eq 'disk') {
1311 $etype = 'images';
1312 } elsif ($media eq 'cdrom') {
1313 $etype = 'iso';
1314 } else {
1315 die "internal error";
1316 }
1317
1318 return if ($vtype eq $etype);
1319
1320 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1321}
1322
1323sub cleanup_drive_path {
1324 my ($opt, $storecfg, $drive) = @_;
1325
1326 # try to convert filesystem paths to volume IDs
1327
1328 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1329 ($drive->{file} !~ m|^/dev/.+|) &&
1330 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1331 ($drive->{file} !~ m/^\d+$/)) {
1332 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1333 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1334 if !$vtype;
1335 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1336 verify_media_type($opt, $vtype, $drive->{media});
1337 $drive->{file} = $volid;
1338 }
1339
1340 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1341}
1342
1343sub parse_hotplug_features {
1344 my ($data) = @_;
1345
1346 my $res = {};
1347
1348 return $res if $data eq '0';
1349
1350 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1351
1352 foreach my $feature (PVE::Tools::split_list($data)) {
1353 if ($feature =~ m/^(network|disk|cpu|memory|usb)$/) {
1354 $res->{$1} = 1;
1355 } else {
1356 die "invalid hotplug feature '$feature'\n";
1357 }
1358 }
1359 return $res;
1360}
1361
1362PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1363sub pve_verify_hotplug_features {
1364 my ($value, $noerr) = @_;
1365
1366 return $value if parse_hotplug_features($value);
1367
1368 return if $noerr;
1369
1370 die "unable to parse hotplug option\n";
1371}
1372
1373sub scsi_inquiry {
1374 my($fh, $noerr) = @_;
1375
1376 my $SG_IO = 0x2285;
1377 my $SG_GET_VERSION_NUM = 0x2282;
1378
1379 my $versionbuf = "\x00" x 8;
1380 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1381 if (!$ret) {
1382 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1383 return;
1384 }
1385 my $version = unpack("I", $versionbuf);
1386 if ($version < 30000) {
1387 die "scsi generic interface too old\n" if !$noerr;
1388 return;
1389 }
1390
1391 my $buf = "\x00" x 36;
1392 my $sensebuf = "\x00" x 8;
1393 my $cmd = pack("C x3 C x1", 0x12, 36);
1394
1395 # see /usr/include/scsi/sg.h
1396 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1397
1398 my $packet = pack(
1399 $sg_io_hdr_t, ord('S'), -3, length($cmd), length($sensebuf), 0, length($buf), $buf, $cmd, $sensebuf, 6000
1400 );
1401
1402 $ret = ioctl($fh, $SG_IO, $packet);
1403 if (!$ret) {
1404 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1405 return;
1406 }
1407
1408 my @res = unpack($sg_io_hdr_t, $packet);
1409 if ($res[17] || $res[18]) {
1410 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1411 return;
1412 }
1413
1414 my $res = {};
1415 $res->@{qw(type removable vendor product revision)} = unpack("C C x6 A8 A16 A4", $buf);
1416
1417 $res->{removable} = $res->{removable} & 128 ? 1 : 0;
1418 $res->{type} &= 0x1F;
1419
1420 return $res;
1421}
1422
1423sub path_is_scsi {
1424 my ($path) = @_;
1425
1426 my $fh = IO::File->new("+<$path") || return;
1427 my $res = scsi_inquiry($fh, 1);
1428 close($fh);
1429
1430 return $res;
1431}
1432
1433sub print_tabletdevice_full {
1434 my ($conf, $arch) = @_;
1435
1436 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1437
1438 # we use uhci for old VMs because tablet driver was buggy in older qemu
1439 my $usbbus;
1440 if (PVE::QemuServer::Machine::machine_type_is_q35($conf) || $arch eq 'aarch64') {
1441 $usbbus = 'ehci';
1442 } else {
1443 $usbbus = 'uhci';
1444 }
1445
1446 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1447}
1448
1449sub print_keyboarddevice_full {
1450 my ($conf, $arch) = @_;
1451
1452 return if $arch ne 'aarch64';
1453
1454 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1455}
1456
1457my sub get_drive_id {
1458 my ($drive) = @_;
1459 return "$drive->{interface}$drive->{index}";
1460}
1461
1462sub print_drivedevice_full {
1463 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1464
1465 my $device = '';
1466 my $maxdev = 0;
1467
1468 my $drive_id = get_drive_id($drive);
1469 if ($drive->{interface} eq 'virtio') {
1470 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1471 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1472 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1473 } elsif ($drive->{interface} eq 'scsi') {
1474
1475 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1476 my $unit = $drive->{index} % $maxdev;
1477 my $devicetype = 'hd';
1478 my $path = '';
1479 if (drive_is_cdrom($drive)) {
1480 $devicetype = 'cd';
1481 } else {
1482 if ($drive->{file} =~ m|^/|) {
1483 $path = $drive->{file};
1484 if (my $info = path_is_scsi($path)) {
1485 if ($info->{type} == 0 && $drive->{scsiblock}) {
1486 $devicetype = 'block';
1487 } elsif ($info->{type} == 1) { # tape
1488 $devicetype = 'generic';
1489 }
1490 }
1491 } else {
1492 $path = PVE::Storage::path($storecfg, $drive->{file});
1493 }
1494
1495 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
1496 my $version = extract_version($machine_type, kvm_user_version());
1497 if ($path =~ m/^iscsi\:\/\// &&
1498 !min_version($version, 4, 1)) {
1499 $devicetype = 'generic';
1500 }
1501 }
1502
1503 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1504 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
1505 } else {
1506 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1507 .",lun=$drive->{index}";
1508 }
1509 $device .= ",drive=drive-$drive_id,id=$drive_id";
1510
1511 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1512 $device .= ",rotation_rate=1";
1513 }
1514 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1515
1516 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1517 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1518 my $controller = int($drive->{index} / $maxdev);
1519 my $unit = $drive->{index} % $maxdev;
1520 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1521
1522 $device = "ide-$devicetype";
1523 if ($drive->{interface} eq 'ide') {
1524 $device .= ",bus=ide.$controller,unit=$unit";
1525 } else {
1526 $device .= ",bus=ahci$controller.$unit";
1527 }
1528 $device .= ",drive=drive-$drive_id,id=$drive_id";
1529
1530 if ($devicetype eq 'hd') {
1531 if (my $model = $drive->{model}) {
1532 $model = URI::Escape::uri_unescape($model);
1533 $device .= ",model=$model";
1534 }
1535 if ($drive->{ssd}) {
1536 $device .= ",rotation_rate=1";
1537 }
1538 }
1539 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1540 } elsif ($drive->{interface} eq 'usb') {
1541 die "implement me";
1542 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1543 } else {
1544 die "unsupported interface type";
1545 }
1546
1547 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1548
1549 if (my $serial = $drive->{serial}) {
1550 $serial = URI::Escape::uri_unescape($serial);
1551 $device .= ",serial=$serial";
1552 }
1553
1554
1555 return $device;
1556}
1557
1558sub get_initiator_name {
1559 my $initiator;
1560
1561 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1562 while (defined(my $line = <$fh>)) {
1563 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1564 $initiator = $1;
1565 last;
1566 }
1567 $fh->close();
1568
1569 return $initiator;
1570}
1571
1572sub print_drive_commandline_full {
1573 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1574
1575 my $path;
1576 my $volid = $drive->{file};
1577 my $format = $drive->{format};
1578 my $drive_id = get_drive_id($drive);
1579
1580 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1581 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1582
1583 if (drive_is_cdrom($drive)) {
1584 $path = get_iso_path($storecfg, $vmid, $volid);
1585 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
1586 } else {
1587 if ($storeid) {
1588 $path = PVE::Storage::path($storecfg, $volid);
1589 $format //= qemu_img_format($scfg, $volname);
1590 } else {
1591 $path = $volid;
1592 $format //= "raw";
1593 }
1594 }
1595
1596 my $is_rbd = $path =~ m/^rbd:/;
1597
1598 my $opts = '';
1599 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1600 foreach my $o (@qemu_drive_options) {
1601 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1602 }
1603
1604 # snapshot only accepts on|off
1605 if (defined($drive->{snapshot})) {
1606 my $v = $drive->{snapshot} ? 'on' : 'off';
1607 $opts .= ",snapshot=$v";
1608 }
1609
1610 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1611 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
1612 }
1613
1614 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1615 my ($dir, $qmpname) = @$type;
1616 if (my $v = $drive->{"mbps$dir"}) {
1617 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1618 }
1619 if (my $v = $drive->{"mbps${dir}_max"}) {
1620 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1621 }
1622 if (my $v = $drive->{"bps${dir}_max_length"}) {
1623 $opts .= ",throttling.bps$qmpname-max-length=$v";
1624 }
1625 if (my $v = $drive->{"iops${dir}"}) {
1626 $opts .= ",throttling.iops$qmpname=$v";
1627 }
1628 if (my $v = $drive->{"iops${dir}_max"}) {
1629 $opts .= ",throttling.iops$qmpname-max=$v";
1630 }
1631 if (my $v = $drive->{"iops${dir}_max_length"}) {
1632 $opts .= ",throttling.iops$qmpname-max-length=$v";
1633 }
1634 }
1635
1636 if ($pbs_name) {
1637 $format = "rbd" if $is_rbd;
1638 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1639 if !$format;
1640 $opts .= ",format=alloc-track,file.driver=$format";
1641 } elsif ($format) {
1642 $opts .= ",format=$format";
1643 }
1644
1645 my $cache_direct = 0;
1646
1647 if (my $cache = $drive->{cache}) {
1648 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1649 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1650 $opts .= ",cache=none";
1651 $cache_direct = 1;
1652 }
1653
1654 # io_uring with cache mode writeback or writethrough on krbd will hang...
1655 my $rbd_no_io_uring = $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1656
1657 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1658 # sometimes, just plain disable...
1659 my $lvm_no_io_uring = $scfg && $scfg->{type} eq 'lvm';
1660
1661 if (!$drive->{aio}) {
1662 if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring) {
1663 # io_uring supports all cache modes
1664 $opts .= ",aio=io_uring";
1665 } else {
1666 # aio native works only with O_DIRECT
1667 if($cache_direct) {
1668 $opts .= ",aio=native";
1669 } else {
1670 $opts .= ",aio=threads";
1671 }
1672 }
1673 }
1674
1675 if (!drive_is_cdrom($drive)) {
1676 my $detectzeroes;
1677 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1678 $detectzeroes = 'off';
1679 } elsif ($drive->{discard}) {
1680 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1681 } else {
1682 # This used to be our default with discard not being specified:
1683 $detectzeroes = 'on';
1684 }
1685
1686 # note: 'detect-zeroes' works per blockdev and we want it to persist
1687 # after the alloc-track is removed, so put it on 'file' directly
1688 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1689 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1690 }
1691
1692 if ($pbs_name) {
1693 $opts .= ",backing=$pbs_name";
1694 $opts .= ",auto-remove=on";
1695 }
1696
1697 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1698 my $file_param = "file";
1699 if ($pbs_name) {
1700 # non-rbd drivers require the underlying file to be a seperate block
1701 # node, so add a second .file indirection
1702 $file_param .= ".file" if !$is_rbd;
1703 $file_param .= ".filename";
1704 }
1705 my $pathinfo = $path ? "$file_param=$path," : '';
1706
1707 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1708}
1709
1710sub print_pbs_blockdev {
1711 my ($pbs_conf, $pbs_name) = @_;
1712 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1713 $blockdev .= ",repository=$pbs_conf->{repository}";
1714 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1715 $blockdev .= ",archive=$pbs_conf->{archive}";
1716 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1717 return $blockdev;
1718}
1719
1720sub print_netdevice_full {
1721 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type) = @_;
1722
1723 my $device = $net->{model};
1724 if ($net->{model} eq 'virtio') {
1725 $device = 'virtio-net-pci';
1726 };
1727
1728 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1729 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1730 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1731 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1732 # and out of each queue plus one config interrupt and control vector queue
1733 my $vectors = $net->{queues} * 2 + 2;
1734 $tmpstr .= ",vectors=$vectors,mq=on";
1735 }
1736 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1737
1738 if (my $mtu = $net->{mtu}) {
1739 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1740 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1741 if ($mtu == 1) {
1742 $mtu = $bridge_mtu;
1743 } elsif ($mtu < 576) {
1744 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1745 } elsif ($mtu > $bridge_mtu) {
1746 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1747 }
1748 $tmpstr .= ",host_mtu=$mtu";
1749 } else {
1750 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1751 }
1752 }
1753
1754 if ($use_old_bios_files) {
1755 my $romfile;
1756 if ($device eq 'virtio-net-pci') {
1757 $romfile = 'pxe-virtio.rom';
1758 } elsif ($device eq 'e1000') {
1759 $romfile = 'pxe-e1000.rom';
1760 } elsif ($device eq 'e1000e') {
1761 $romfile = 'pxe-e1000e.rom';
1762 } elsif ($device eq 'ne2k') {
1763 $romfile = 'pxe-ne2k_pci.rom';
1764 } elsif ($device eq 'pcnet') {
1765 $romfile = 'pxe-pcnet.rom';
1766 } elsif ($device eq 'rtl8139') {
1767 $romfile = 'pxe-rtl8139.rom';
1768 }
1769 $tmpstr .= ",romfile=$romfile" if $romfile;
1770 }
1771
1772 return $tmpstr;
1773}
1774
1775sub print_netdev_full {
1776 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1777
1778 my $i = '';
1779 if ($netid =~ m/^net(\d+)$/) {
1780 $i = int($1);
1781 }
1782
1783 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1784
1785 my $ifname = "tap${vmid}i$i";
1786
1787 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1788 die "interface name '$ifname' is too long (max 15 character)\n"
1789 if length($ifname) >= 16;
1790
1791 my $vhostparam = '';
1792 if (is_native($arch)) {
1793 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1794 }
1795
1796 my $vmname = $conf->{name} || "vm$vmid";
1797
1798 my $netdev = "";
1799 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1800
1801 if ($net->{bridge}) {
1802 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1803 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1804 } else {
1805 $netdev = "type=user,id=$netid,hostname=$vmname";
1806 }
1807
1808 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1809
1810 return $netdev;
1811}
1812
1813my $vga_map = {
1814 'cirrus' => 'cirrus-vga',
1815 'std' => 'VGA',
1816 'vmware' => 'vmware-svga',
1817 'virtio' => 'virtio-vga',
1818};
1819
1820sub print_vga_device {
1821 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1822
1823 my $type = $vga_map->{$vga->{type}};
1824 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1825 $type = 'virtio-gpu';
1826 }
1827 my $vgamem_mb = $vga->{memory};
1828
1829 my $max_outputs = '';
1830 if ($qxlnum) {
1831 $type = $id ? 'qxl' : 'qxl-vga';
1832
1833 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1834 # set max outputs so linux can have up to 4 qxl displays with one device
1835 if (min_version($machine_version, 4, 1)) {
1836 $max_outputs = ",max_outputs=4";
1837 }
1838 }
1839 }
1840
1841 die "no devicetype for $vga->{type}\n" if !$type;
1842
1843 my $memory = "";
1844 if ($vgamem_mb) {
1845 if ($vga->{type} eq 'virtio') {
1846 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1847 $memory = ",max_hostmem=$bytes";
1848 } elsif ($qxlnum) {
1849 # from https://www.spice-space.org/multiple-monitors.html
1850 $memory = ",vgamem_mb=$vga->{memory}";
1851 my $ram = $vgamem_mb * 4;
1852 my $vram = $vgamem_mb * 2;
1853 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1854 } else {
1855 $memory = ",vgamem_mb=$vga->{memory}";
1856 }
1857 } elsif ($qxlnum && $id) {
1858 $memory = ",ram_size=67108864,vram_size=33554432";
1859 }
1860
1861 my $edidoff = "";
1862 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1863 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1864 }
1865
1866 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1867 my $vgaid = "vga" . ($id // '');
1868 my $pciaddr;
1869 if ($q35 && $vgaid eq 'vga') {
1870 # the first display uses pcie.0 bus on q35 machines
1871 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1872 } else {
1873 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1874 }
1875
1876 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1877}
1878
1879sub parse_number_sets {
1880 my ($set) = @_;
1881 my $res = [];
1882 foreach my $part (split(/;/, $set)) {
1883 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1884 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1885 push @$res, [ $1, $2 ];
1886 } else {
1887 die "invalid range: $part\n";
1888 }
1889 }
1890 return $res;
1891}
1892
1893sub parse_numa {
1894 my ($data) = @_;
1895
1896 my $res = parse_property_string($numa_fmt, $data);
1897 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1898 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
1899 return $res;
1900}
1901
1902# netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1903sub parse_net {
1904 my ($data) = @_;
1905
1906 my $res = eval { parse_property_string($net_fmt, $data) };
1907 if ($@) {
1908 warn $@;
1909 return;
1910 }
1911 if (!defined($res->{macaddr})) {
1912 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1913 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1914 }
1915 return $res;
1916}
1917
1918# ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1919sub parse_ipconfig {
1920 my ($data) = @_;
1921
1922 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1923 if ($@) {
1924 warn $@;
1925 return;
1926 }
1927
1928 if ($res->{gw} && !$res->{ip}) {
1929 warn 'gateway specified without specifying an IP address';
1930 return;
1931 }
1932 if ($res->{gw6} && !$res->{ip6}) {
1933 warn 'IPv6 gateway specified without specifying an IPv6 address';
1934 return;
1935 }
1936 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1937 warn 'gateway specified together with DHCP';
1938 return;
1939 }
1940 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1941 # gw6 + auto/dhcp
1942 warn "IPv6 gateway specified together with $res->{ip6} address";
1943 return;
1944 }
1945
1946 if (!$res->{ip} && !$res->{ip6}) {
1947 return { ip => 'dhcp', ip6 => 'dhcp' };
1948 }
1949
1950 return $res;
1951}
1952
1953sub print_net {
1954 my $net = shift;
1955
1956 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1957}
1958
1959sub add_random_macs {
1960 my ($settings) = @_;
1961
1962 foreach my $opt (keys %$settings) {
1963 next if $opt !~ m/^net(\d+)$/;
1964 my $net = parse_net($settings->{$opt});
1965 next if !$net;
1966 $settings->{$opt} = print_net($net);
1967 }
1968}
1969
1970sub vm_is_volid_owner {
1971 my ($storecfg, $vmid, $volid) = @_;
1972
1973 if ($volid !~ m|^/|) {
1974 my ($path, $owner);
1975 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
1976 if ($owner && ($owner == $vmid)) {
1977 return 1;
1978 }
1979 }
1980
1981 return;
1982}
1983
1984sub vmconfig_register_unused_drive {
1985 my ($storecfg, $vmid, $conf, $drive) = @_;
1986
1987 if (drive_is_cloudinit($drive)) {
1988 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
1989 warn $@ if $@;
1990 } elsif (!drive_is_cdrom($drive)) {
1991 my $volid = $drive->{file};
1992 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
1993 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
1994 }
1995 }
1996}
1997
1998# smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
1999my $smbios1_fmt = {
2000 uuid => {
2001 type => 'string',
2002 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
2003 format_description => 'UUID',
2004 description => "Set SMBIOS1 UUID.",
2005 optional => 1,
2006 },
2007 version => {
2008 type => 'string',
2009 pattern => '[A-Za-z0-9+\/]+={0,2}',
2010 format_description => 'Base64 encoded string',
2011 description => "Set SMBIOS1 version.",
2012 optional => 1,
2013 },
2014 serial => {
2015 type => 'string',
2016 pattern => '[A-Za-z0-9+\/]+={0,2}',
2017 format_description => 'Base64 encoded string',
2018 description => "Set SMBIOS1 serial number.",
2019 optional => 1,
2020 },
2021 manufacturer => {
2022 type => 'string',
2023 pattern => '[A-Za-z0-9+\/]+={0,2}',
2024 format_description => 'Base64 encoded string',
2025 description => "Set SMBIOS1 manufacturer.",
2026 optional => 1,
2027 },
2028 product => {
2029 type => 'string',
2030 pattern => '[A-Za-z0-9+\/]+={0,2}',
2031 format_description => 'Base64 encoded string',
2032 description => "Set SMBIOS1 product ID.",
2033 optional => 1,
2034 },
2035 sku => {
2036 type => 'string',
2037 pattern => '[A-Za-z0-9+\/]+={0,2}',
2038 format_description => 'Base64 encoded string',
2039 description => "Set SMBIOS1 SKU string.",
2040 optional => 1,
2041 },
2042 family => {
2043 type => 'string',
2044 pattern => '[A-Za-z0-9+\/]+={0,2}',
2045 format_description => 'Base64 encoded string',
2046 description => "Set SMBIOS1 family string.",
2047 optional => 1,
2048 },
2049 base64 => {
2050 type => 'boolean',
2051 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2052 optional => 1,
2053 },
2054};
2055
2056sub parse_smbios1 {
2057 my ($data) = @_;
2058
2059 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2060 warn $@ if $@;
2061 return $res;
2062}
2063
2064sub print_smbios1 {
2065 my ($smbios1) = @_;
2066 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2067}
2068
2069PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2070
2071sub parse_watchdog {
2072 my ($value) = @_;
2073
2074 return if !$value;
2075
2076 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2077 warn $@ if $@;
2078 return $res;
2079}
2080
2081sub parse_guest_agent {
2082 my ($conf) = @_;
2083
2084 return {} if !defined($conf->{agent});
2085
2086 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2087 warn $@ if $@;
2088
2089 # if the agent is disabled ignore the other potentially set properties
2090 return {} if !$res->{enabled};
2091 return $res;
2092}
2093
2094sub get_qga_key {
2095 my ($conf, $key) = @_;
2096 return undef if !defined($conf->{agent});
2097
2098 my $agent = parse_guest_agent($conf);
2099 return $agent->{$key};
2100}
2101
2102sub parse_vga {
2103 my ($value) = @_;
2104
2105 return {} if !$value;
2106 my $res = eval { parse_property_string($vga_fmt, $value) };
2107 warn $@ if $@;
2108 return $res;
2109}
2110
2111sub parse_rng {
2112 my ($value) = @_;
2113
2114 return if !$value;
2115
2116 my $res = eval { parse_property_string($rng_fmt, $value) };
2117 warn $@ if $@;
2118 return $res;
2119}
2120
2121sub parse_meta_info {
2122 my ($value) = @_;
2123
2124 return if !$value;
2125
2126 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2127 warn $@ if $@;
2128 return $res;
2129}
2130
2131sub new_meta_info_string {
2132 my () = @_; # for now do not allow to override any value
2133
2134 return PVE::JSONSchema::print_property_string(
2135 {
2136 'creation-qemu' => kvm_user_version(),
2137 ctime => "". int(time()),
2138 },
2139 $meta_info_fmt
2140 );
2141}
2142
2143sub qemu_created_version_fixups {
2144 my ($conf, $forcemachine, $kvmver) = @_;
2145
2146 my $meta = parse_meta_info($conf->{meta}) // {};
2147 my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
2148
2149 # check if we need to apply some handling for VMs that always use the latest machine version but
2150 # had a machine version transition happen that affected HW such that, e.g., an OS config change
2151 # would be required (we do not want to pin machine version for non-windows OS type)
2152 if (
2153 (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
2154 && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
2155 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
2156 && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
2157 ) {
2158 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
2159 if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
2160 # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
2161 # and thus with the predictable interface naming of systemd
2162 return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
2163 }
2164 }
2165 return;
2166}
2167
2168PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
2169sub verify_usb_device {
2170 my ($value, $noerr) = @_;
2171
2172 return $value if parse_usb_device($value);
2173
2174 return if $noerr;
2175
2176 die "unable to parse usb device\n";
2177}
2178
2179# add JSON properties for create and set function
2180sub json_config_properties {
2181 my $prop = shift;
2182
2183 my $skip_json_config_opts = {
2184 parent => 1,
2185 snaptime => 1,
2186 vmstate => 1,
2187 runningmachine => 1,
2188 runningcpu => 1,
2189 meta => 1,
2190 };
2191
2192 foreach my $opt (keys %$confdesc) {
2193 next if $skip_json_config_opts->{$opt};
2194 $prop->{$opt} = $confdesc->{$opt};
2195 }
2196
2197 return $prop;
2198}
2199
2200# return copy of $confdesc_cloudinit to generate documentation
2201sub cloudinit_config_properties {
2202
2203 return dclone($confdesc_cloudinit);
2204}
2205
2206sub check_type {
2207 my ($key, $value) = @_;
2208
2209 die "unknown setting '$key'\n" if !$confdesc->{$key};
2210
2211 my $type = $confdesc->{$key}->{type};
2212
2213 if (!defined($value)) {
2214 die "got undefined value\n";
2215 }
2216
2217 if ($value =~ m/[\n\r]/) {
2218 die "property contains a line feed\n";
2219 }
2220
2221 if ($type eq 'boolean') {
2222 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2223 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2224 die "type check ('boolean') failed - got '$value'\n";
2225 } elsif ($type eq 'integer') {
2226 return int($1) if $value =~ m/^(\d+)$/;
2227 die "type check ('integer') failed - got '$value'\n";
2228 } elsif ($type eq 'number') {
2229 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2230 die "type check ('number') failed - got '$value'\n";
2231 } elsif ($type eq 'string') {
2232 if (my $fmt = $confdesc->{$key}->{format}) {
2233 PVE::JSONSchema::check_format($fmt, $value);
2234 return $value;
2235 }
2236 $value =~ s/^\"(.*)\"$/$1/;
2237 return $value;
2238 } else {
2239 die "internal error"
2240 }
2241}
2242
2243sub destroy_vm {
2244 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2245
2246 my $conf = PVE::QemuConfig->load_config($vmid);
2247
2248 PVE::QemuConfig->check_lock($conf) if !$skiplock;
2249
2250 if ($conf->{template}) {
2251 # check if any base image is still used by a linked clone
2252 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2253 my ($ds, $drive) = @_;
2254 return if drive_is_cdrom($drive);
2255
2256 my $volid = $drive->{file};
2257 return if !$volid || $volid =~ m|^/|;
2258
2259 die "base volume '$volid' is still in use by linked cloned\n"
2260 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2261
2262 });
2263 }
2264
2265 my $volids = {};
2266 my $remove_owned_drive = sub {
2267 my ($ds, $drive) = @_;
2268 return if drive_is_cdrom($drive, 1);
2269
2270 my $volid = $drive->{file};
2271 return if !$volid || $volid =~ m|^/|;
2272 return if $volids->{$volid};
2273
2274 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2275 return if !$path || !$owner || ($owner != $vmid);
2276
2277 $volids->{$volid} = 1;
2278 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2279 warn "Could not remove disk '$volid', check manually: $@" if $@;
2280 };
2281
2282 # only remove disks owned by this VM (referenced in the config)
2283 my $include_opts = {
2284 include_unused => 1,
2285 extra_keys => ['vmstate'],
2286 };
2287 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2288
2289 for my $snap (values %{$conf->{snapshots}}) {
2290 next if !defined($snap->{vmstate});
2291 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2292 next if !defined($drive);
2293 $remove_owned_drive->('vmstate', $drive);
2294 }
2295
2296 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2297
2298 if ($purge_unreferenced) { # also remove unreferenced disk
2299 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2300 PVE::Storage::foreach_volid($vmdisks, sub {
2301 my ($volid, $sid, $volname, $d) = @_;
2302 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2303 warn $@ if $@;
2304 });
2305 }
2306
2307 if (defined $replacement_conf) {
2308 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2309 } else {
2310 PVE::QemuConfig->destroy_config($vmid);
2311 }
2312}
2313
2314sub parse_vm_config {
2315 my ($filename, $raw) = @_;
2316
2317 return if !defined($raw);
2318
2319 my $res = {
2320 digest => Digest::SHA::sha1_hex($raw),
2321 snapshots => {},
2322 pending => {},
2323 };
2324
2325 $filename =~ m|/qemu-server/(\d+)\.conf$|
2326 || die "got strange filename '$filename'";
2327
2328 my $vmid = $1;
2329
2330 my $conf = $res;
2331 my $descr;
2332 my $section = '';
2333
2334 my @lines = split(/\n/, $raw);
2335 foreach my $line (@lines) {
2336 next if $line =~ m/^\s*$/;
2337
2338 if ($line =~ m/^\[PENDING\]\s*$/i) {
2339 $section = 'pending';
2340 if (defined($descr)) {
2341 $descr =~ s/\s+$//;
2342 $conf->{description} = $descr;
2343 }
2344 $descr = undef;
2345 $conf = $res->{$section} = {};
2346 next;
2347
2348 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2349 $section = $1;
2350 if (defined($descr)) {
2351 $descr =~ s/\s+$//;
2352 $conf->{description} = $descr;
2353 }
2354 $descr = undef;
2355 $conf = $res->{snapshots}->{$section} = {};
2356 next;
2357 }
2358
2359 if ($line =~ m/^\#(.*)\s*$/) {
2360 $descr = '' if !defined($descr);
2361 $descr .= PVE::Tools::decode_text($1) . "\n";
2362 next;
2363 }
2364
2365 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2366 $descr = '' if !defined($descr);
2367 $descr .= PVE::Tools::decode_text($2);
2368 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2369 $conf->{snapstate} = $1;
2370 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2371 my $key = $1;
2372 my $value = $2;
2373 $conf->{$key} = $value;
2374 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2375 my $value = $1;
2376 if ($section eq 'pending') {
2377 $conf->{delete} = $value; # we parse this later
2378 } else {
2379 warn "vm $vmid - propertry 'delete' is only allowed in [PENDING]\n";
2380 }
2381 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2382 my $key = $1;
2383 my $value = $2;
2384 eval { $value = check_type($key, $value); };
2385 if ($@) {
2386 warn "vm $vmid - unable to parse value of '$key' - $@";
2387 } else {
2388 $key = 'ide2' if $key eq 'cdrom';
2389 my $fmt = $confdesc->{$key}->{format};
2390 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2391 my $v = parse_drive($key, $value);
2392 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2393 $v->{file} = $volid;
2394 $value = print_drive($v);
2395 } else {
2396 warn "vm $vmid - unable to parse value of '$key'\n";
2397 next;
2398 }
2399 }
2400
2401 $conf->{$key} = $value;
2402 }
2403 } else {
2404 warn "vm $vmid - unable to parse config: $line\n";
2405 }
2406 }
2407
2408 if (defined($descr)) {
2409 $descr =~ s/\s+$//;
2410 $conf->{description} = $descr;
2411 }
2412 delete $res->{snapstate}; # just to be sure
2413
2414 return $res;
2415}
2416
2417sub write_vm_config {
2418 my ($filename, $conf) = @_;
2419
2420 delete $conf->{snapstate}; # just to be sure
2421
2422 if ($conf->{cdrom}) {
2423 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2424 $conf->{ide2} = $conf->{cdrom};
2425 delete $conf->{cdrom};
2426 }
2427
2428 # we do not use 'smp' any longer
2429 if ($conf->{sockets}) {
2430 delete $conf->{smp};
2431 } elsif ($conf->{smp}) {
2432 $conf->{sockets} = $conf->{smp};
2433 delete $conf->{cores};
2434 delete $conf->{smp};
2435 }
2436
2437 my $used_volids = {};
2438
2439 my $cleanup_config = sub {
2440 my ($cref, $pending, $snapname) = @_;
2441
2442 foreach my $key (keys %$cref) {
2443 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2444 $key eq 'snapstate' || $key eq 'pending';
2445 my $value = $cref->{$key};
2446 if ($key eq 'delete') {
2447 die "propertry 'delete' is only allowed in [PENDING]\n"
2448 if !$pending;
2449 # fixme: check syntax?
2450 next;
2451 }
2452 eval { $value = check_type($key, $value); };
2453 die "unable to parse value of '$key' - $@" if $@;
2454
2455 $cref->{$key} = $value;
2456
2457 if (!$snapname && is_valid_drivename($key)) {
2458 my $drive = parse_drive($key, $value);
2459 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2460 }
2461 }
2462 };
2463
2464 &$cleanup_config($conf);
2465
2466 &$cleanup_config($conf->{pending}, 1);
2467
2468 foreach my $snapname (keys %{$conf->{snapshots}}) {
2469 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2470 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2471 }
2472
2473 # remove 'unusedX' settings if we re-add a volume
2474 foreach my $key (keys %$conf) {
2475 my $value = $conf->{$key};
2476 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2477 delete $conf->{$key};
2478 }
2479 }
2480
2481 my $generate_raw_config = sub {
2482 my ($conf, $pending) = @_;
2483
2484 my $raw = '';
2485
2486 # add description as comment to top of file
2487 if (defined(my $descr = $conf->{description})) {
2488 if ($descr) {
2489 foreach my $cl (split(/\n/, $descr)) {
2490 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2491 }
2492 } else {
2493 $raw .= "#\n" if $pending;
2494 }
2495 }
2496
2497 foreach my $key (sort keys %$conf) {
2498 next if $key =~ /^(digest|description|pending|snapshots)$/;
2499 $raw .= "$key: $conf->{$key}\n";
2500 }
2501 return $raw;
2502 };
2503
2504 my $raw = &$generate_raw_config($conf);
2505
2506 if (scalar(keys %{$conf->{pending}})){
2507 $raw .= "\n[PENDING]\n";
2508 $raw .= &$generate_raw_config($conf->{pending}, 1);
2509 }
2510
2511 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2512 $raw .= "\n[$snapname]\n";
2513 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2514 }
2515
2516 return $raw;
2517}
2518
2519sub load_defaults {
2520
2521 my $res = {};
2522
2523 # we use static defaults from our JSON schema configuration
2524 foreach my $key (keys %$confdesc) {
2525 if (defined(my $default = $confdesc->{$key}->{default})) {
2526 $res->{$key} = $default;
2527 }
2528 }
2529
2530 return $res;
2531}
2532
2533sub config_list {
2534 my $vmlist = PVE::Cluster::get_vmlist();
2535 my $res = {};
2536 return $res if !$vmlist || !$vmlist->{ids};
2537 my $ids = $vmlist->{ids};
2538 my $nodename = nodename();
2539
2540 foreach my $vmid (keys %$ids) {
2541 my $d = $ids->{$vmid};
2542 next if !$d->{node} || $d->{node} ne $nodename;
2543 next if !$d->{type} || $d->{type} ne 'qemu';
2544 $res->{$vmid}->{exists} = 1;
2545 }
2546 return $res;
2547}
2548
2549# test if VM uses local resources (to prevent migration)
2550sub check_local_resources {
2551 my ($conf, $noerr) = @_;
2552
2553 my @loc_res = ();
2554
2555 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2556 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2557
2558 push @loc_res, "ivshmem" if $conf->{ivshmem};
2559
2560 foreach my $k (keys %$conf) {
2561 next if $k =~ m/^usb/ && ($conf->{$k} =~ m/^spice(?![^,])/);
2562 # sockets are safe: they will recreated be on the target side post-migrate
2563 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2564 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2565 }
2566
2567 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2568
2569 return \@loc_res;
2570}
2571
2572# check if used storages are available on all nodes (use by migrate)
2573sub check_storage_availability {
2574 my ($storecfg, $conf, $node) = @_;
2575
2576 PVE::QemuConfig->foreach_volume($conf, sub {
2577 my ($ds, $drive) = @_;
2578
2579 my $volid = $drive->{file};
2580 return if !$volid;
2581
2582 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2583 return if !$sid;
2584
2585 # check if storage is available on both nodes
2586 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2587 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2588
2589 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2590
2591 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2592 if !$scfg->{content}->{$vtype};
2593 });
2594}
2595
2596# list nodes where all VM images are available (used by has_feature API)
2597sub shared_nodes {
2598 my ($conf, $storecfg) = @_;
2599
2600 my $nodelist = PVE::Cluster::get_nodelist();
2601 my $nodehash = { map { $_ => 1 } @$nodelist };
2602 my $nodename = nodename();
2603
2604 PVE::QemuConfig->foreach_volume($conf, sub {
2605 my ($ds, $drive) = @_;
2606
2607 my $volid = $drive->{file};
2608 return if !$volid;
2609
2610 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2611 if ($storeid) {
2612 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2613 if ($scfg->{disable}) {
2614 $nodehash = {};
2615 } elsif (my $avail = $scfg->{nodes}) {
2616 foreach my $node (keys %$nodehash) {
2617 delete $nodehash->{$node} if !$avail->{$node};
2618 }
2619 } elsif (!$scfg->{shared}) {
2620 foreach my $node (keys %$nodehash) {
2621 delete $nodehash->{$node} if $node ne $nodename
2622 }
2623 }
2624 }
2625 });
2626
2627 return $nodehash
2628}
2629
2630sub check_local_storage_availability {
2631 my ($conf, $storecfg) = @_;
2632
2633 my $nodelist = PVE::Cluster::get_nodelist();
2634 my $nodehash = { map { $_ => {} } @$nodelist };
2635
2636 PVE::QemuConfig->foreach_volume($conf, sub {
2637 my ($ds, $drive) = @_;
2638
2639 my $volid = $drive->{file};
2640 return if !$volid;
2641
2642 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2643 if ($storeid) {
2644 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2645
2646 if ($scfg->{disable}) {
2647 foreach my $node (keys %$nodehash) {
2648 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2649 }
2650 } elsif (my $avail = $scfg->{nodes}) {
2651 foreach my $node (keys %$nodehash) {
2652 if (!$avail->{$node}) {
2653 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2654 }
2655 }
2656 }
2657 }
2658 });
2659
2660 foreach my $node (values %$nodehash) {
2661 if (my $unavail = $node->{unavailable_storages}) {
2662 $node->{unavailable_storages} = [ sort keys %$unavail ];
2663 }
2664 }
2665
2666 return $nodehash
2667}
2668
2669# Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2670sub check_running {
2671 my ($vmid, $nocheck, $node) = @_;
2672
2673 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2674 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2675}
2676
2677sub vzlist {
2678
2679 my $vzlist = config_list();
2680
2681 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2682
2683 while (defined(my $de = $fd->read)) {
2684 next if $de !~ m/^(\d+)\.pid$/;
2685 my $vmid = $1;
2686 next if !defined($vzlist->{$vmid});
2687 if (my $pid = check_running($vmid)) {
2688 $vzlist->{$vmid}->{pid} = $pid;
2689 }
2690 }
2691
2692 return $vzlist;
2693}
2694
2695our $vmstatus_return_properties = {
2696 vmid => get_standard_option('pve-vmid'),
2697 status => {
2698 description => "Qemu process status.",
2699 type => 'string',
2700 enum => ['stopped', 'running'],
2701 },
2702 maxmem => {
2703 description => "Maximum memory in bytes.",
2704 type => 'integer',
2705 optional => 1,
2706 renderer => 'bytes',
2707 },
2708 maxdisk => {
2709 description => "Root disk size in bytes.",
2710 type => 'integer',
2711 optional => 1,
2712 renderer => 'bytes',
2713 },
2714 name => {
2715 description => "VM name.",
2716 type => 'string',
2717 optional => 1,
2718 },
2719 qmpstatus => {
2720 description => "Qemu QMP agent status.",
2721 type => 'string',
2722 optional => 1,
2723 },
2724 pid => {
2725 description => "PID of running qemu process.",
2726 type => 'integer',
2727 optional => 1,
2728 },
2729 uptime => {
2730 description => "Uptime.",
2731 type => 'integer',
2732 optional => 1,
2733 renderer => 'duration',
2734 },
2735 cpus => {
2736 description => "Maximum usable CPUs.",
2737 type => 'number',
2738 optional => 1,
2739 },
2740 lock => {
2741 description => "The current config lock, if any.",
2742 type => 'string',
2743 optional => 1,
2744 },
2745 tags => {
2746 description => "The current configured tags, if any",
2747 type => 'string',
2748 optional => 1,
2749 },
2750 'running-machine' => {
2751 description => "The currently running machine type (if running).",
2752 type => 'string',
2753 optional => 1,
2754 },
2755 'running-qemu' => {
2756 description => "The currently running QEMU version (if running).",
2757 type => 'string',
2758 optional => 1,
2759 },
2760};
2761
2762my $last_proc_pid_stat;
2763
2764# get VM status information
2765# This must be fast and should not block ($full == false)
2766# We only query KVM using QMP if $full == true (this can be slow)
2767sub vmstatus {
2768 my ($opt_vmid, $full) = @_;
2769
2770 my $res = {};
2771
2772 my $storecfg = PVE::Storage::config();
2773
2774 my $list = vzlist();
2775 my $defaults = load_defaults();
2776
2777 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2778
2779 my $cpucount = $cpuinfo->{cpus} || 1;
2780
2781 foreach my $vmid (keys %$list) {
2782 next if $opt_vmid && ($vmid ne $opt_vmid);
2783
2784 my $conf = PVE::QemuConfig->load_config($vmid);
2785
2786 my $d = { vmid => int($vmid) };
2787 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2788
2789 # fixme: better status?
2790 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2791
2792 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2793 if (defined($size)) {
2794 $d->{disk} = 0; # no info available
2795 $d->{maxdisk} = $size;
2796 } else {
2797 $d->{disk} = 0;
2798 $d->{maxdisk} = 0;
2799 }
2800
2801 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2802 * ($conf->{cores} || $defaults->{cores});
2803 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2804 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2805
2806 $d->{name} = $conf->{name} || "VM $vmid";
2807 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2808 : $defaults->{memory}*(1024*1024);
2809
2810 if ($conf->{balloon}) {
2811 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2812 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2813 : $defaults->{shares};
2814 }
2815
2816 $d->{uptime} = 0;
2817 $d->{cpu} = 0;
2818 $d->{mem} = 0;
2819
2820 $d->{netout} = 0;
2821 $d->{netin} = 0;
2822
2823 $d->{diskread} = 0;
2824 $d->{diskwrite} = 0;
2825
2826 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2827
2828 $d->{serial} = 1 if conf_has_serial($conf);
2829 $d->{lock} = $conf->{lock} if $conf->{lock};
2830 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2831
2832 $res->{$vmid} = $d;
2833 }
2834
2835 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2836 foreach my $dev (keys %$netdev) {
2837 next if $dev !~ m/^tap([1-9]\d*)i/;
2838 my $vmid = $1;
2839 my $d = $res->{$vmid};
2840 next if !$d;
2841
2842 $d->{netout} += $netdev->{$dev}->{receive};
2843 $d->{netin} += $netdev->{$dev}->{transmit};
2844
2845 if ($full) {
2846 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2847 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2848 }
2849
2850 }
2851
2852 my $ctime = gettimeofday;
2853
2854 foreach my $vmid (keys %$list) {
2855
2856 my $d = $res->{$vmid};
2857 my $pid = $d->{pid};
2858 next if !$pid;
2859
2860 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2861 next if !$pstat; # not running
2862
2863 my $used = $pstat->{utime} + $pstat->{stime};
2864
2865 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2866
2867 if ($pstat->{vsize}) {
2868 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2869 }
2870
2871 my $old = $last_proc_pid_stat->{$pid};
2872 if (!$old) {
2873 $last_proc_pid_stat->{$pid} = {
2874 time => $ctime,
2875 used => $used,
2876 cpu => 0,
2877 };
2878 next;
2879 }
2880
2881 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
2882
2883 if ($dtime > 1000) {
2884 my $dutime = $used - $old->{used};
2885
2886 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
2887 $last_proc_pid_stat->{$pid} = {
2888 time => $ctime,
2889 used => $used,
2890 cpu => $d->{cpu},
2891 };
2892 } else {
2893 $d->{cpu} = $old->{cpu};
2894 }
2895 }
2896
2897 return $res if !$full;
2898
2899 my $qmpclient = PVE::QMPClient->new();
2900
2901 my $ballooncb = sub {
2902 my ($vmid, $resp) = @_;
2903
2904 my $info = $resp->{'return'};
2905 return if !$info->{max_mem};
2906
2907 my $d = $res->{$vmid};
2908
2909 # use memory assigned to VM
2910 $d->{maxmem} = $info->{max_mem};
2911 $d->{balloon} = $info->{actual};
2912
2913 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
2914 $d->{mem} = $info->{total_mem} - $info->{free_mem};
2915 $d->{freemem} = $info->{free_mem};
2916 }
2917
2918 $d->{ballooninfo} = $info;
2919 };
2920
2921 my $blockstatscb = sub {
2922 my ($vmid, $resp) = @_;
2923 my $data = $resp->{'return'} || [];
2924 my $totalrdbytes = 0;
2925 my $totalwrbytes = 0;
2926
2927 for my $blockstat (@$data) {
2928 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
2929 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
2930
2931 $blockstat->{device} =~ s/drive-//;
2932 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
2933 }
2934 $res->{$vmid}->{diskread} = $totalrdbytes;
2935 $res->{$vmid}->{diskwrite} = $totalwrbytes;
2936 };
2937
2938 my $machinecb = sub {
2939 my ($vmid, $resp) = @_;
2940 my $data = $resp->{'return'} || [];
2941
2942 $res->{$vmid}->{'running-machine'} =
2943 PVE::QemuServer::Machine::current_from_query_machines($data);
2944 };
2945
2946 my $versioncb = sub {
2947 my ($vmid, $resp) = @_;
2948 my $data = $resp->{'return'} // {};
2949 my $version = 'unknown';
2950
2951 if (my $v = $data->{qemu}) {
2952 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
2953 }
2954
2955 $res->{$vmid}->{'running-qemu'} = $version;
2956 };
2957
2958 my $statuscb = sub {
2959 my ($vmid, $resp) = @_;
2960
2961 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
2962 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
2963 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
2964 # this fails if ballon driver is not loaded, so this must be
2965 # the last commnand (following command are aborted if this fails).
2966 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
2967
2968 my $status = 'unknown';
2969 if (!defined($status = $resp->{'return'}->{status})) {
2970 warn "unable to get VM status\n";
2971 return;
2972 }
2973
2974 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
2975 };
2976
2977 foreach my $vmid (keys %$list) {
2978 next if $opt_vmid && ($vmid ne $opt_vmid);
2979 next if !$res->{$vmid}->{pid}; # not running
2980 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
2981 }
2982
2983 $qmpclient->queue_execute(undef, 2);
2984
2985 foreach my $vmid (keys %$list) {
2986 next if $opt_vmid && ($vmid ne $opt_vmid);
2987 next if !$res->{$vmid}->{pid}; #not running
2988
2989 # we can't use the $qmpclient since it might have already aborted on
2990 # 'query-balloon', but this might also fail for older versions...
2991 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
2992 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
2993 }
2994
2995 foreach my $vmid (keys %$list) {
2996 next if $opt_vmid && ($vmid ne $opt_vmid);
2997 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
2998 }
2999
3000 return $res;
3001}
3002
3003sub conf_has_serial {
3004 my ($conf) = @_;
3005
3006 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3007 if ($conf->{"serial$i"}) {
3008 return 1;
3009 }
3010 }
3011
3012 return 0;
3013}
3014
3015sub conf_has_audio {
3016 my ($conf, $id) = @_;
3017
3018 $id //= 0;
3019 my $audio = $conf->{"audio$id"};
3020 return if !defined($audio);
3021
3022 my $audioproperties = parse_property_string($audio_fmt, $audio);
3023 my $audiodriver = $audioproperties->{driver} // 'spice';
3024
3025 return {
3026 dev => $audioproperties->{device},
3027 dev_id => "audiodev$id",
3028 backend => $audiodriver,
3029 backend_id => "$audiodriver-backend${id}",
3030 };
3031}
3032
3033sub audio_devs {
3034 my ($audio, $audiopciaddr, $machine_version) = @_;
3035
3036 my $devs = [];
3037
3038 my $id = $audio->{dev_id};
3039 my $audiodev = "";
3040 if (min_version($machine_version, 4, 2)) {
3041 $audiodev = ",audiodev=$audio->{backend_id}";
3042 }
3043
3044 if ($audio->{dev} eq 'AC97') {
3045 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
3046 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3047 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
3048 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3049 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
3050 } else {
3051 die "unkown audio device '$audio->{dev}', implement me!";
3052 }
3053
3054 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3055
3056 return $devs;
3057}
3058
3059sub get_tpm_paths {
3060 my ($vmid) = @_;
3061 return {
3062 socket => "/var/run/qemu-server/$vmid.swtpm",
3063 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3064 };
3065}
3066
3067sub add_tpm_device {
3068 my ($vmid, $devices, $conf) = @_;
3069
3070 return if !$conf->{tpmstate0};
3071
3072 my $paths = get_tpm_paths($vmid);
3073
3074 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3075 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3076 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3077}
3078
3079sub start_swtpm {
3080 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3081
3082 return if !$tpmdrive;
3083
3084 my $state;
3085 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3086 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3087 if ($storeid) {
3088 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3089 } else {
3090 $state = $tpm->{file};
3091 }
3092
3093 my $paths = get_tpm_paths($vmid);
3094
3095 # during migration, we will get state from remote
3096 #
3097 if (!$migration) {
3098 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3099 my $setup_cmd = [
3100 "swtpm_setup",
3101 "--tpmstate",
3102 "file://$state",
3103 "--createek",
3104 "--create-ek-cert",
3105 "--create-platform-cert",
3106 "--lock-nvram",
3107 "--config",
3108 "/etc/swtpm_setup.conf", # do not use XDG configs
3109 "--runas",
3110 "0", # force creation as root, error if not possible
3111 "--not-overwrite", # ignore existing state, do not modify
3112 ];
3113
3114 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3115 # TPM 2.0 supports ECC crypto, use if possible
3116 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3117
3118 run_command($setup_cmd, outfunc => sub {
3119 print "swtpm_setup: $1\n";
3120 });
3121 }
3122
3123 my $emulator_cmd = [
3124 "swtpm",
3125 "socket",
3126 "--tpmstate",
3127 "backend-uri=file://$state,mode=0600",
3128 "--ctrl",
3129 "type=unixio,path=$paths->{socket},mode=0600",
3130 "--pid",
3131 "file=$paths->{pid}",
3132 "--terminate", # terminate on QEMU disconnect
3133 "--daemon",
3134 ];
3135 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3136 run_command($emulator_cmd, outfunc => sub { print $1; });
3137
3138 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3139 while (! -e $paths->{pid}) {
3140 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3141 usleep(50_000);
3142 }
3143
3144 # return untainted PID of swtpm daemon so it can be killed on error
3145 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3146 return $1;
3147}
3148
3149sub vga_conf_has_spice {
3150 my ($vga) = @_;
3151
3152 my $vgaconf = parse_vga($vga);
3153 my $vgatype = $vgaconf->{type};
3154 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3155
3156 return $1 || 1;
3157}
3158
3159sub is_native($) {
3160 my ($arch) = @_;
3161 return get_host_arch() eq $arch;
3162}
3163
3164sub get_vm_arch {
3165 my ($conf) = @_;
3166 return $conf->{arch} // get_host_arch();
3167}
3168
3169my $default_machines = {
3170 x86_64 => 'pc',
3171 aarch64 => 'virt',
3172};
3173
3174sub get_installed_machine_version {
3175 my ($kvmversion) = @_;
3176 $kvmversion = kvm_user_version() if !defined($kvmversion);
3177 $kvmversion =~ m/^(\d+\.\d+)/;
3178 return $1;
3179}
3180
3181sub windows_get_pinned_machine_version {
3182 my ($machine, $base_version, $kvmversion) = @_;
3183
3184 my $pin_version = $base_version;
3185 if (!defined($base_version) ||
3186 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3187 ) {
3188 $pin_version = get_installed_machine_version($kvmversion);
3189 }
3190 if (!$machine || $machine eq 'pc') {
3191 $machine = "pc-i440fx-$pin_version";
3192 } elsif ($machine eq 'q35') {
3193 $machine = "pc-q35-$pin_version";
3194 } elsif ($machine eq 'virt') {
3195 $machine = "virt-$pin_version";
3196 } else {
3197 warn "unknown machine type '$machine', not touching that!\n";
3198 }
3199
3200 return $machine;
3201}
3202
3203sub get_vm_machine {
3204 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3205
3206 my $machine = $forcemachine || $conf->{machine};
3207
3208 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3209 $kvmversion //= kvm_user_version();
3210 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3211 # layout which confuses windows quite a bit and may result in various regressions..
3212 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3213 if (windows_version($conf->{ostype})) {
3214 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3215 }
3216 $arch //= 'x86_64';
3217 $machine ||= $default_machines->{$arch};
3218 if ($add_pve_version) {
3219 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3220 $machine .= "+pve$pvever";
3221 }
3222 }
3223
3224 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3225 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3226 $machine = $1 if $is_pxe;
3227
3228 # for version-pinned machines that do not include a pve-version (e.g.
3229 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3230 $machine .= '+pve0';
3231
3232 $machine .= '.pxe' if $is_pxe;
3233 }
3234
3235 return $machine;
3236}
3237
3238sub get_ovmf_files($$$) {
3239 my ($arch, $efidisk, $smm) = @_;
3240
3241 my $types = $OVMF->{$arch}
3242 or die "no OVMF images known for architecture '$arch'\n";
3243
3244 my $type = 'default';
3245 if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3246 $type = $smm ? "4m" : "4m-no-smm";
3247 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3248 }
3249
3250 return $types->{$type}->@*;
3251}
3252
3253my $Arch2Qemu = {
3254 aarch64 => '/usr/bin/qemu-system-aarch64',
3255 x86_64 => '/usr/bin/qemu-system-x86_64',
3256};
3257sub get_command_for_arch($) {
3258 my ($arch) = @_;
3259 return '/usr/bin/kvm' if is_native($arch);
3260
3261 my $cmd = $Arch2Qemu->{$arch}
3262 or die "don't know how to emulate architecture '$arch'\n";
3263 return $cmd;
3264}
3265
3266# To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3267# to use in a QEMU command line (-cpu element), first array_intersect the result
3268# of query_supported_ with query_understood_. This is necessary because:
3269#
3270# a) query_understood_ returns flags the host cannot use and
3271# b) query_supported_ (rather the QMP call) doesn't actually return CPU
3272# flags, but CPU settings - with most of them being flags. Those settings
3273# (and some flags, curiously) cannot be specified as a "-cpu" argument.
3274#
3275# query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3276# expensive. If you need the value returned from this, you can get it much
3277# cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3278# $accel being 'kvm' or 'tcg'.
3279#
3280# pvestatd calls this function on startup and whenever the QEMU/KVM version
3281# changes, automatically populating pmxcfs.
3282#
3283# Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3284# since kvm and tcg machines support different flags
3285#
3286sub query_supported_cpu_flags {
3287 my ($arch) = @_;
3288
3289 $arch //= get_host_arch();
3290 my $default_machine = $default_machines->{$arch};
3291
3292 my $flags = {};
3293
3294 # FIXME: Once this is merged, the code below should work for ARM as well:
3295 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3296 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3297 $arch eq "aarch64";
3298
3299 my $kvm_supported = defined(kvm_version());
3300 my $qemu_cmd = get_command_for_arch($arch);
3301 my $fakevmid = -1;
3302 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3303
3304 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3305 my $query_supported_run_qemu = sub {
3306 my ($kvm) = @_;
3307
3308 my $flags = {};
3309 my $cmd = [
3310 $qemu_cmd,
3311 '-machine', $default_machine,
3312 '-display', 'none',
3313 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3314 '-mon', 'chardev=qmp,mode=control',
3315 '-pidfile', $pidfile,
3316 '-S', '-daemonize'
3317 ];
3318
3319 if (!$kvm) {
3320 push @$cmd, '-accel', 'tcg';
3321 }
3322
3323 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3324 die "QEMU flag querying VM exited with code " . $rc if $rc;
3325
3326 eval {
3327 my $cmd_result = mon_cmd(
3328 $fakevmid,
3329 'query-cpu-model-expansion',
3330 type => 'full',
3331 model => { name => 'host' }
3332 );
3333
3334 my $props = $cmd_result->{model}->{props};
3335 foreach my $prop (keys %$props) {
3336 next if $props->{$prop} ne '1';
3337 # QEMU returns some flags multiple times, with '_', '.' or '-'
3338 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3339 # We only keep those with underscores, to match /proc/cpuinfo
3340 $prop =~ s/\.|-/_/g;
3341 $flags->{$prop} = 1;
3342 }
3343 };
3344 my $err = $@;
3345
3346 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3347 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3348
3349 die $err if $err;
3350
3351 return [ sort keys %$flags ];
3352 };
3353
3354 # We need to query QEMU twice, since KVM and TCG have different supported flags
3355 PVE::QemuConfig->lock_config($fakevmid, sub {
3356 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3357 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3358
3359 if ($kvm_supported) {
3360 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3361 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3362 }
3363 });
3364
3365 return $flags;
3366}
3367
3368# Understood CPU flags are written to a file at 'pve-qemu' compile time
3369my $understood_cpu_flag_dir = "/usr/share/kvm";
3370sub query_understood_cpu_flags {
3371 my $arch = get_host_arch();
3372 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3373
3374 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3375 if ! -e $filepath;
3376
3377 my $raw = file_get_contents($filepath);
3378 $raw =~ s/^\s+|\s+$//g;
3379 my @flags = split(/\s+/, $raw);
3380
3381 return \@flags;
3382}
3383
3384my sub get_cpuunits {
3385 my ($conf) = @_;
3386 return $conf->{cpuunits} // (PVE::CGroup::cgroup_mode() == 2 ? 100 : 1024);
3387}
3388
3389# Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
3390# anymore. But smm=off seems to be required when using SeaBIOS and serial display.
3391my sub should_disable_smm {
3392 my ($conf, $vga) = @_;
3393
3394 return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
3395 $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
3396}
3397
3398sub config_to_command {
3399 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3400 $pbs_backing) = @_;
3401
3402 my $cmd = [];
3403 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3404 my $devices = [];
3405 my $bridges = {};
3406 my $ostype = $conf->{ostype};
3407 my $winversion = windows_version($ostype);
3408 my $kvm = $conf->{kvm};
3409 my $nodename = nodename();
3410
3411 my $arch = get_vm_arch($conf);
3412 my $kvm_binary = get_command_for_arch($arch);
3413 my $kvmver = kvm_user_version($kvm_binary);
3414
3415 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3416 $kvmver //= "undefined";
3417 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3418 }
3419
3420 my $add_pve_version = min_version($kvmver, 4, 1);
3421
3422 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3423 my $machine_version = extract_version($machine_type, $kvmver);
3424 $kvm //= 1 if is_native($arch);
3425
3426 $machine_version =~ m/(\d+)\.(\d+)/;
3427 my ($machine_major, $machine_minor) = ($1, $2);
3428
3429 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3430 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3431 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3432 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3433 ." please upgrade node '$nodename'\n"
3434 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3435 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3436 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3437 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3438 ." node '$nodename'\n";
3439 }
3440
3441 # if a specific +pve version is required for a feature, use $version_guard
3442 # instead of min_version to allow machines to be run with the minimum
3443 # required version
3444 my $required_pve_version = 0;
3445 my $version_guard = sub {
3446 my ($major, $minor, $pve) = @_;
3447 return 0 if !min_version($machine_version, $major, $minor, $pve);
3448 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3449 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3450 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3451 return 1;
3452 };
3453
3454 if ($kvm && !defined kvm_version()) {
3455 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3456 ." or enable in BIOS.\n";
3457 }
3458
3459 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3460 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3461 my $use_old_bios_files = undef;
3462 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3463
3464 my $cpuunits = get_cpuunits($conf);
3465
3466 push @$cmd, $kvm_binary;
3467
3468 push @$cmd, '-id', $vmid;
3469
3470 my $vmname = $conf->{name} || "vm$vmid";
3471
3472 push @$cmd, '-name', $vmname;
3473
3474 push @$cmd, '-no-shutdown';
3475
3476 my $use_virtio = 0;
3477
3478 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3479 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3480 push @$cmd, '-mon', "chardev=qmp,mode=control";
3481
3482 if (min_version($machine_version, 2, 12)) {
3483 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3484 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3485 }
3486
3487 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3488
3489 push @$cmd, '-daemonize';
3490
3491 if ($conf->{smbios1}) {
3492 my $smbios_conf = parse_smbios1($conf->{smbios1});
3493 if ($smbios_conf->{base64}) {
3494 # Do not pass base64 flag to qemu
3495 delete $smbios_conf->{base64};
3496 my $smbios_string = "";
3497 foreach my $key (keys %$smbios_conf) {
3498 my $value;
3499 if ($key eq "uuid") {
3500 $value = $smbios_conf->{uuid}
3501 } else {
3502 $value = decode_base64($smbios_conf->{$key});
3503 }
3504 # qemu accepts any binary data, only commas need escaping by double comma
3505 $value =~ s/,/,,/g;
3506 $smbios_string .= "," . $key . "=" . $value if $value;
3507 }
3508 push @$cmd, '-smbios', "type=1" . $smbios_string;
3509 } else {
3510 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3511 }
3512 }
3513
3514 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3515 my $d;
3516 if (my $efidisk = $conf->{efidisk0}) {
3517 $d = parse_drive('efidisk0', $efidisk);
3518 }
3519
3520 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3521 die "uefi base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3522
3523 my ($path, $format);
3524 my $read_only_str = '';
3525 if ($d) {
3526 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3527 $format = $d->{format};
3528 if ($storeid) {
3529 $path = PVE::Storage::path($storecfg, $d->{file});
3530 if (!defined($format)) {
3531 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3532 $format = qemu_img_format($scfg, $volname);
3533 }
3534 } else {
3535 $path = $d->{file};
3536 die "efidisk format must be specified\n"
3537 if !defined($format);
3538 }
3539
3540 $read_only_str = ',readonly=on' if drive_is_read_only($conf, $d);
3541 } else {
3542 warn "no efidisk configured! Using temporary efivars disk.\n";
3543 $path = "/tmp/$vmid-ovmf.fd";
3544 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3545 $format = 'raw';
3546 }
3547
3548 my $size_str = "";
3549
3550 if ($format eq 'raw' && $version_guard->(4, 1, 2)) {
3551 $size_str = ",size=" . (-s $ovmf_vars);
3552 }
3553
3554 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3555 my $cache = "";
3556 if ($path =~ m/^rbd:/) {
3557 $cache = ',cache=writeback';
3558 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3559 }
3560
3561 push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code";
3562 push @$cmd, '-drive', "if=pflash,unit=1$cache,format=$format,id=drive-efidisk0$size_str,file=${path}${read_only_str}";
3563 }
3564
3565 if ($q35) { # tell QEMU to load q35 config early
3566 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3567 if (min_version($machine_version, 4, 0)) {
3568 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3569 } else {
3570 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3571 }
3572 }
3573
3574 if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
3575 push @$cmd, $fixups->@*;
3576 }
3577
3578 if ($conf->{vmgenid}) {
3579 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3580 }
3581
3582 # add usb controllers
3583 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3584 $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES);
3585 push @$devices, @usbcontrollers if @usbcontrollers;
3586 my $vga = parse_vga($conf->{vga});
3587
3588 my $qxlnum = vga_conf_has_spice($conf->{vga});
3589 $vga->{type} = 'qxl' if $qxlnum;
3590
3591 if (!$vga->{type}) {
3592 if ($arch eq 'aarch64') {
3593 $vga->{type} = 'virtio';
3594 } elsif (min_version($machine_version, 2, 9)) {
3595 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3596 } else {
3597 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3598 }
3599 }
3600
3601 # enable absolute mouse coordinates (needed by vnc)
3602 my $tablet = $conf->{tablet};
3603 if (!defined($tablet)) {
3604 $tablet = $defaults->{tablet};
3605 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3606 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3607 }
3608
3609 if ($tablet) {
3610 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3611 my $kbd = print_keyboarddevice_full($conf, $arch);
3612 push @$devices, '-device', $kbd if defined($kbd);
3613 }
3614
3615 my $bootorder = device_bootorder($conf);
3616
3617 # host pci device passthrough
3618 my ($kvm_off, $gpu_passthrough, $legacy_igd) = PVE::QemuServer::PCI::print_hostpci_devices(
3619 $vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder);
3620
3621 # usb devices
3622 my $usb_dev_features = {};
3623 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3624
3625 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3626 $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder);
3627 push @$devices, @usbdevices if @usbdevices;
3628
3629 # serial devices
3630 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3631 my $path = $conf->{"serial$i"} or next;
3632 if ($path eq 'socket') {
3633 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3634 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3635 # On aarch64, serial0 is the UART device. Qemu only allows
3636 # connecting UART devices via the '-serial' command line, as
3637 # the device has a fixed slot on the hardware...
3638 if ($arch eq 'aarch64' && $i == 0) {
3639 push @$devices, '-serial', "chardev:serial$i";
3640 } else {
3641 push @$devices, '-device', "isa-serial,chardev=serial$i";
3642 }
3643 } else {
3644 die "no such serial device\n" if ! -c $path;
3645 push @$devices, '-chardev', "tty,id=serial$i,path=$path";
3646 push @$devices, '-device', "isa-serial,chardev=serial$i";
3647 }
3648 }
3649
3650 # parallel devices
3651 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3652 if (my $path = $conf->{"parallel$i"}) {
3653 die "no such parallel device\n" if ! -c $path;
3654 my $devtype = $path =~ m!^/dev/usb/lp! ? 'tty' : 'parport';
3655 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3656 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3657 }
3658 }
3659
3660 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3661 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3662 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3663 push @$devices, @$audio_devs;
3664 }
3665
3666 add_tpm_device($vmid, $devices, $conf);
3667
3668 my $sockets = 1;
3669 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3670 $sockets = $conf->{sockets} if $conf->{sockets};
3671
3672 my $cores = $conf->{cores} || 1;
3673
3674 my $maxcpus = $sockets * $cores;
3675
3676 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3677
3678 my $allowed_vcpus = $cpuinfo->{cpus};
3679
3680 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3681
3682 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3683 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3684 for (my $i = 2; $i <= $vcpus; $i++) {
3685 my $cpustr = print_cpu_device($conf,$i);
3686 push @$cmd, '-device', $cpustr;
3687 }
3688
3689 } else {
3690
3691 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3692 }
3693 push @$cmd, '-nodefaults';
3694
3695 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3696
3697 push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3698
3699 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3700
3701 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3702 push @$devices, '-device', print_vga_device(
3703 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3704 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3705 push @$cmd, '-vnc', "unix:$socket,password=on";
3706 } else {
3707 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3708 push @$cmd, '-nographic';
3709 }
3710
3711 # time drift fix
3712 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3713 my $useLocaltime = $conf->{localtime};
3714
3715 if ($winversion >= 5) { # windows
3716 $useLocaltime = 1 if !defined($conf->{localtime});
3717
3718 # use time drift fix when acpi is enabled
3719 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3720 $tdf = 1 if !defined($conf->{tdf});
3721 }
3722 }
3723
3724 if ($winversion >= 6) {
3725 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3726 push @$cmd, '-no-hpet';
3727 }
3728
3729 push @$rtcFlags, 'driftfix=slew' if $tdf;
3730
3731 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3732 push @$rtcFlags, "base=$conf->{startdate}";
3733 } elsif ($useLocaltime) {
3734 push @$rtcFlags, 'base=localtime';
3735 }
3736
3737 if ($forcecpu) {
3738 push @$cmd, '-cpu', $forcecpu;
3739 } else {
3740 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3741 }
3742
3743 PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
3744
3745 push @$cmd, '-S' if $conf->{freeze};
3746
3747 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3748
3749 my $guest_agent = parse_guest_agent($conf);
3750
3751 if ($guest_agent->{enabled}) {
3752 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3753 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3754
3755 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3756 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3757 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3758 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3759 } elsif ($guest_agent->{type} eq 'isa') {
3760 push @$devices, '-device', "isa-serial,chardev=qga0";
3761 }
3762 }
3763
3764 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3765 if ($rng && $version_guard->(4, 1, 2)) {
3766 check_rng_source($rng->{source});
3767
3768 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3769 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3770 my $limiter_str = "";
3771 if ($max_bytes) {
3772 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3773 }
3774
3775 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3776 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3777 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3778 }
3779
3780 my $spice_port;
3781
3782 if ($qxlnum) {
3783 if ($qxlnum > 1) {
3784 if ($winversion){
3785 for (my $i = 1; $i < $qxlnum; $i++){
3786 push @$devices, '-device', print_vga_device(
3787 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3788 }
3789 } else {
3790 # assume other OS works like Linux
3791 my ($ram, $vram) = ("134217728", "67108864");
3792 if ($vga->{memory}) {
3793 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3794 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3795 }
3796 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3797 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3798 }
3799 }
3800
3801 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3802
3803 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3804 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3805 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3806
3807 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3808 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3809 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3810
3811 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3812 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3813
3814 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3815 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3816 if ($spice_enhancement->{foldersharing}) {
3817 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3818 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3819 }
3820
3821 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3822 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3823 if $spice_enhancement->{videostreaming};
3824
3825 push @$devices, '-spice', "$spice_opts";
3826 }
3827
3828 # enable balloon by default, unless explicitly disabled
3829 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3830 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3831 push @$devices, '-device', "virtio-balloon-pci,id=balloon0$pciaddr";
3832 }
3833
3834 if ($conf->{watchdog}) {
3835 my $wdopts = parse_watchdog($conf->{watchdog});
3836 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3837 my $watchdog = $wdopts->{model} || 'i6300esb';
3838 push @$devices, '-device', "$watchdog$pciaddr";
3839 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3840 }
3841
3842 my $vollist = [];
3843 my $scsicontroller = {};
3844 my $ahcicontroller = {};
3845 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3846
3847 # Add iscsi initiator name if available
3848 if (my $initiator = get_initiator_name()) {
3849 push @$devices, '-iscsi', "initiator-name=$initiator";
3850 }
3851
3852 PVE::QemuConfig->foreach_volume($conf, sub {
3853 my ($ds, $drive) = @_;
3854
3855 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3856 check_volume_storage_type($storecfg, $drive->{file});
3857 push @$vollist, $drive->{file};
3858 }
3859
3860 # ignore efidisk here, already added in bios/fw handling code above
3861 return if $drive->{interface} eq 'efidisk';
3862 # similar for TPM
3863 return if $drive->{interface} eq 'tpmstate';
3864
3865 $use_virtio = 1 if $ds =~ m/^virtio/;
3866
3867 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3868
3869 if ($drive->{interface} eq 'virtio'){
3870 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
3871 }
3872
3873 if ($drive->{interface} eq 'scsi') {
3874
3875 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
3876
3877 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
3878 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
3879
3880 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
3881 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
3882
3883 my $iothread = '';
3884 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
3885 $iothread .= ",iothread=iothread-$controller_prefix$controller";
3886 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
3887 } elsif ($drive->{iothread}) {
3888 warn "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n";
3889 }
3890
3891 my $queues = '';
3892 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
3893 $queues = ",num_queues=$drive->{queues}";
3894 }
3895
3896 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
3897 if !$scsicontroller->{$controller};
3898 $scsicontroller->{$controller}=1;
3899 }
3900
3901 if ($drive->{interface} eq 'sata') {
3902 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
3903 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
3904 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
3905 if !$ahcicontroller->{$controller};
3906 $ahcicontroller->{$controller}=1;
3907 }
3908
3909 my $pbs_conf = $pbs_backing->{$ds};
3910 my $pbs_name = undef;
3911 if ($pbs_conf) {
3912 $pbs_name = "drive-$ds-pbs";
3913 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
3914 }
3915
3916 my $drive_cmd = print_drive_commandline_full(
3917 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
3918
3919 # extra protection for templates, but SATA and IDE don't support it..
3920 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
3921
3922 push @$devices, '-drive',$drive_cmd;
3923 push @$devices, '-device', print_drivedevice_full(
3924 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
3925 });
3926
3927 for (my $i = 0; $i < $MAX_NETS; $i++) {
3928 my $netname = "net$i";
3929
3930 next if !$conf->{$netname};
3931 my $d = parse_net($conf->{$netname});
3932 next if !$d;
3933
3934 $use_virtio = 1 if $d->{model} eq 'virtio';
3935
3936 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
3937
3938 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
3939 push @$devices, '-netdev', $netdevfull;
3940
3941 my $netdevicefull = print_netdevice_full(
3942 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type);
3943
3944 push @$devices, '-device', $netdevicefull;
3945 }
3946
3947 if ($conf->{ivshmem}) {
3948 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
3949
3950 my $bus;
3951 if ($q35) {
3952 $bus = print_pcie_addr("ivshmem");
3953 } else {
3954 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
3955 }
3956
3957 my $ivshmem_name = $ivshmem->{name} // $vmid;
3958 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
3959
3960 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
3961 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
3962 .",size=$ivshmem->{size}M";
3963 }
3964
3965 # pci.4 is nested in pci.1
3966 $bridges->{1} = 1 if $bridges->{4};
3967
3968 if (!$q35) { # add pci bridges
3969 if (min_version($machine_version, 2, 3)) {
3970 $bridges->{1} = 1;
3971 $bridges->{2} = 1;
3972 }
3973 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
3974 }
3975
3976 for my $k (sort {$b cmp $a} keys %$bridges) {
3977 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
3978
3979 my $k_name = $k;
3980 if ($k == 2 && $legacy_igd) {
3981 $k_name = "$k-igd";
3982 }
3983 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
3984 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
3985
3986 if ($q35) { # add after -readconfig pve-q35.cfg
3987 splice @$devices, 2, 0, '-device', $devstr;
3988 } else {
3989 unshift @$devices, '-device', $devstr if $k > 0;
3990 }
3991 }
3992
3993 if (!$kvm) {
3994 push @$machineFlags, 'accel=tcg';
3995 }
3996
3997 push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga);
3998
3999 my $machine_type_min = $machine_type;
4000 if ($add_pve_version) {
4001 $machine_type_min =~ s/\+pve\d+$//;
4002 $machine_type_min .= "+pve$required_pve_version";
4003 }
4004 push @$machineFlags, "type=${machine_type_min}";
4005
4006 push @$cmd, @$devices;
4007 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
4008 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
4009 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
4010
4011 if (my $vmstate = $conf->{vmstate}) {
4012 my $statepath = PVE::Storage::path($storecfg, $vmstate);
4013 push @$vollist, $vmstate;
4014 push @$cmd, '-loadstate', $statepath;
4015 print "activating and using '$vmstate' as vmstate\n";
4016 }
4017
4018 if (PVE::QemuConfig->is_template($conf)) {
4019 # needed to workaround base volumes being read-only
4020 push @$cmd, '-snapshot';
4021 }
4022
4023 # add custom args
4024 if ($conf->{args}) {
4025 my $aa = PVE::Tools::split_args($conf->{args});
4026 push @$cmd, @$aa;
4027 }
4028
4029 return wantarray ? ($cmd, $vollist, $spice_port) : $cmd;
4030}
4031
4032sub check_rng_source {
4033 my ($source) = @_;
4034
4035 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4036 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4037 if ! -e $source;
4038
4039 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4040 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4041 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4042 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4043 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4044 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4045 ." to the host.\n";
4046 }
4047}
4048
4049sub spice_port {
4050 my ($vmid) = @_;
4051
4052 my $res = mon_cmd($vmid, 'query-spice');
4053
4054 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
4055}
4056
4057sub vm_devices_list {
4058 my ($vmid) = @_;
4059
4060 my $res = mon_cmd($vmid, 'query-pci');
4061 my $devices_to_check = [];
4062 my $devices = {};
4063 foreach my $pcibus (@$res) {
4064 push @$devices_to_check, @{$pcibus->{devices}},
4065 }
4066
4067 while (@$devices_to_check) {
4068 my $to_check = [];
4069 for my $d (@$devices_to_check) {
4070 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
4071 next if !$d->{'pci_bridge'};
4072
4073 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4074 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
4075 }
4076 $devices_to_check = $to_check;
4077 }
4078
4079 my $resblock = mon_cmd($vmid, 'query-block');
4080 foreach my $block (@$resblock) {
4081 if($block->{device} =~ m/^drive-(\S+)/){
4082 $devices->{$1} = 1;
4083 }
4084 }
4085
4086 my $resmice = mon_cmd($vmid, 'query-mice');
4087 foreach my $mice (@$resmice) {
4088 if ($mice->{name} eq 'QEMU HID Tablet') {
4089 $devices->{tablet} = 1;
4090 last;
4091 }
4092 }
4093
4094 # for usb devices there is no query-usb
4095 # but we can iterate over the entries in
4096 # qom-list path=/machine/peripheral
4097 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4098 foreach my $per (@$resperipheral) {
4099 if ($per->{name} =~ m/^usb\d+$/) {
4100 $devices->{$per->{name}} = 1;
4101 }
4102 }
4103
4104 return $devices;
4105}
4106
4107sub vm_deviceplug {
4108 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4109
4110 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4111
4112 my $devices_list = vm_devices_list($vmid);
4113 return 1 if defined($devices_list->{$deviceid});
4114
4115 # add PCI bridge if we need it for the device
4116 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4117
4118 if ($deviceid eq 'tablet') {
4119 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4120 } elsif ($deviceid eq 'keyboard') {
4121 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4122 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4123 die "usb hotplug currently not reliable\n";
4124 # since we can't reliably hot unplug all added usb devices and usb
4125 # passthrough breaks live migration we disable usb hotplugging for now
4126 #qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device));
4127 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4128 qemu_iothread_add($vmid, $deviceid, $device);
4129
4130 qemu_driveadd($storecfg, $vmid, $device);
4131 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4132
4133 qemu_deviceadd($vmid, $devicefull);
4134 eval { qemu_deviceaddverify($vmid, $deviceid); };
4135 if (my $err = $@) {
4136 eval { qemu_drivedel($vmid, $deviceid); };
4137 warn $@ if $@;
4138 die $err;
4139 }
4140 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4141 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4142 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4143 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4144
4145 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4146
4147 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4148 qemu_iothread_add($vmid, $deviceid, $device);
4149 $devicefull .= ",iothread=iothread-$deviceid";
4150 }
4151
4152 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4153 $devicefull .= ",num_queues=$device->{queues}";
4154 }
4155
4156 qemu_deviceadd($vmid, $devicefull);
4157 qemu_deviceaddverify($vmid, $deviceid);
4158 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4159 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4160 qemu_driveadd($storecfg, $vmid, $device);
4161
4162 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4163 eval { qemu_deviceadd($vmid, $devicefull); };
4164 if (my $err = $@) {
4165 eval { qemu_drivedel($vmid, $deviceid); };
4166 warn $@ if $@;
4167 die $err;
4168 }
4169 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4170 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4171
4172 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4173 my $use_old_bios_files = undef;
4174 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4175
4176 my $netdevicefull = print_netdevice_full(
4177 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type);
4178 qemu_deviceadd($vmid, $netdevicefull);
4179 eval {
4180 qemu_deviceaddverify($vmid, $deviceid);
4181 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4182 };
4183 if (my $err = $@) {
4184 eval { qemu_netdevdel($vmid, $deviceid); };
4185 warn $@ if $@;
4186 die $err;
4187 }
4188 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4189 my $bridgeid = $2;
4190 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4191 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4192
4193 qemu_deviceadd($vmid, $devicefull);
4194 qemu_deviceaddverify($vmid, $deviceid);
4195 } else {
4196 die "can't hotplug device '$deviceid'\n";
4197 }
4198
4199 return 1;
4200}
4201
4202# fixme: this should raise exceptions on error!
4203sub vm_deviceunplug {
4204 my ($vmid, $conf, $deviceid) = @_;
4205
4206 my $devices_list = vm_devices_list($vmid);
4207 return 1 if !defined($devices_list->{$deviceid});
4208
4209 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4210 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4211
4212 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard') {
4213 qemu_devicedel($vmid, $deviceid);
4214 } elsif ($deviceid =~ m/^usb\d+$/) {
4215 die "usb hotplug currently not reliable\n";
4216 # when unplugging usb devices this way, there may be remaining usb
4217 # controllers/hubs so we disable it for now
4218 #qemu_devicedel($vmid, $deviceid);
4219 #qemu_devicedelverify($vmid, $deviceid);
4220 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4221 my $device = parse_drive($deviceid, $conf->{$deviceid});
4222
4223 qemu_devicedel($vmid, $deviceid);
4224 qemu_devicedelverify($vmid, $deviceid);
4225 qemu_drivedel($vmid, $deviceid);
4226 qemu_iothread_del($vmid, $deviceid, $device);
4227 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4228 qemu_devicedel($vmid, $deviceid);
4229 qemu_devicedelverify($vmid, $deviceid);
4230 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4231 my $device = parse_drive($deviceid, $conf->{$deviceid});
4232
4233 qemu_devicedel($vmid, $deviceid);
4234 qemu_drivedel($vmid, $deviceid);
4235 qemu_deletescsihw($conf, $vmid, $deviceid);
4236
4237 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4238 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4239 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4240 qemu_devicedel($vmid, $deviceid);
4241 qemu_devicedelverify($vmid, $deviceid);
4242 qemu_netdevdel($vmid, $deviceid);
4243 } else {
4244 die "can't unplug device '$deviceid'\n";
4245 }
4246
4247 return 1;
4248}
4249
4250sub qemu_deviceadd {
4251 my ($vmid, $devicefull) = @_;
4252
4253 $devicefull = "driver=".$devicefull;
4254 my %options = split(/[=,]/, $devicefull);
4255
4256 mon_cmd($vmid, "device_add" , %options);
4257}
4258
4259sub qemu_devicedel {
4260 my ($vmid, $deviceid) = @_;
4261
4262 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
4263}
4264
4265sub qemu_iothread_add {
4266 my ($vmid, $deviceid, $device) = @_;
4267
4268 if ($device->{iothread}) {
4269 my $iothreads = vm_iothreads_list($vmid);
4270 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4271 }
4272}
4273
4274sub qemu_iothread_del {
4275 my ($vmid, $deviceid, $device) = @_;
4276
4277 if ($device->{iothread}) {
4278 my $iothreads = vm_iothreads_list($vmid);
4279 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4280 }
4281}
4282
4283sub qemu_objectadd {
4284 my ($vmid, $objectid, $qomtype) = @_;
4285
4286 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4287
4288 return 1;
4289}
4290
4291sub qemu_objectdel {
4292 my ($vmid, $objectid) = @_;
4293
4294 mon_cmd($vmid, "object-del", id => $objectid);
4295
4296 return 1;
4297}
4298
4299sub qemu_driveadd {
4300 my ($storecfg, $vmid, $device) = @_;
4301
4302 my $kvmver = get_running_qemu_version($vmid);
4303 my $io_uring = min_version($kvmver, 6, 0);
4304 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4305 $drive =~ s/\\/\\\\/g;
4306 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4307
4308 # If the command succeeds qemu prints: "OK"
4309 return 1 if $ret =~ m/OK/s;
4310
4311 die "adding drive failed: $ret\n";
4312}
4313
4314sub qemu_drivedel {
4315 my ($vmid, $deviceid) = @_;
4316
4317 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4318 $ret =~ s/^\s+//;
4319
4320 return 1 if $ret eq "";
4321
4322 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4323 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4324
4325 die "deleting drive $deviceid failed : $ret\n";
4326}
4327
4328sub qemu_deviceaddverify {
4329 my ($vmid, $deviceid) = @_;
4330
4331 for (my $i = 0; $i <= 5; $i++) {
4332 my $devices_list = vm_devices_list($vmid);
4333 return 1 if defined($devices_list->{$deviceid});
4334 sleep 1;
4335 }
4336
4337 die "error on hotplug device '$deviceid'\n";
4338}
4339
4340
4341sub qemu_devicedelverify {
4342 my ($vmid, $deviceid) = @_;
4343
4344 # need to verify that the device is correctly removed as device_del
4345 # is async and empty return is not reliable
4346
4347 for (my $i = 0; $i <= 5; $i++) {
4348 my $devices_list = vm_devices_list($vmid);
4349 return 1 if !defined($devices_list->{$deviceid});
4350 sleep 1;
4351 }
4352
4353 die "error on hot-unplugging device '$deviceid'\n";
4354}
4355
4356sub qemu_findorcreatescsihw {
4357 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4358
4359 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4360
4361 my $scsihwid="$controller_prefix$controller";
4362 my $devices_list = vm_devices_list($vmid);
4363
4364 if (!defined($devices_list->{$scsihwid})) {
4365 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4366 }
4367
4368 return 1;
4369}
4370
4371sub qemu_deletescsihw {
4372 my ($conf, $vmid, $opt) = @_;
4373
4374 my $device = parse_drive($opt, $conf->{$opt});
4375
4376 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4377 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4378 return 1;
4379 }
4380
4381 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4382
4383 my $devices_list = vm_devices_list($vmid);
4384 foreach my $opt (keys %{$devices_list}) {
4385 if (is_valid_drivename($opt)) {
4386 my $drive = parse_drive($opt, $conf->{$opt});
4387 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4388 return 1;
4389 }
4390 }
4391 }
4392
4393 my $scsihwid="scsihw$controller";
4394
4395 vm_deviceunplug($vmid, $conf, $scsihwid);
4396
4397 return 1;
4398}
4399
4400sub qemu_add_pci_bridge {
4401 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4402
4403 my $bridges = {};
4404
4405 my $bridgeid;
4406
4407 print_pci_addr($device, $bridges, $arch, $machine_type);
4408
4409 while (my ($k, $v) = each %$bridges) {
4410 $bridgeid = $k;
4411 }
4412 return 1 if !defined($bridgeid) || $bridgeid < 1;
4413
4414 my $bridge = "pci.$bridgeid";
4415 my $devices_list = vm_devices_list($vmid);
4416
4417 if (!defined($devices_list->{$bridge})) {
4418 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4419 }
4420
4421 return 1;
4422}
4423
4424sub qemu_set_link_status {
4425 my ($vmid, $device, $up) = @_;
4426
4427 mon_cmd($vmid, "set_link", name => $device,
4428 up => $up ? JSON::true : JSON::false);
4429}
4430
4431sub qemu_netdevadd {
4432 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4433
4434 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4435 my %options = split(/[=,]/, $netdev);
4436
4437 if (defined(my $vhost = $options{vhost})) {
4438 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4439 }
4440
4441 if (defined(my $queues = $options{queues})) {
4442 $options{queues} = $queues + 0;
4443 }
4444
4445 mon_cmd($vmid, "netdev_add", %options);
4446 return 1;
4447}
4448
4449sub qemu_netdevdel {
4450 my ($vmid, $deviceid) = @_;
4451
4452 mon_cmd($vmid, "netdev_del", id => $deviceid);
4453}
4454
4455sub qemu_usb_hotplug {
4456 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4457
4458 return if !$device;
4459
4460 # remove the old one first
4461 vm_deviceunplug($vmid, $conf, $deviceid);
4462
4463 # check if xhci controller is necessary and available
4464 if ($device->{usb3}) {
4465
4466 my $devicelist = vm_devices_list($vmid);
4467
4468 if (!$devicelist->{xhci}) {
4469 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4470 qemu_deviceadd($vmid, "nec-usb-xhci,id=xhci$pciaddr");
4471 }
4472 }
4473 my $d = parse_usb_device($device->{host});
4474 $d->{usb3} = $device->{usb3};
4475
4476 # add the new one
4477 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d, $arch, $machine_type);
4478}
4479
4480sub qemu_cpu_hotplug {
4481 my ($vmid, $conf, $vcpus) = @_;
4482
4483 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4484
4485 my $sockets = 1;
4486 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4487 $sockets = $conf->{sockets} if $conf->{sockets};
4488 my $cores = $conf->{cores} || 1;
4489 my $maxcpus = $sockets * $cores;
4490
4491 $vcpus = $maxcpus if !$vcpus;
4492
4493 die "you can't add more vcpus than maxcpus\n"
4494 if $vcpus > $maxcpus;
4495
4496 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4497
4498 if ($vcpus < $currentvcpus) {
4499
4500 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4501
4502 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4503 qemu_devicedel($vmid, "cpu$i");
4504 my $retry = 0;
4505 my $currentrunningvcpus = undef;
4506 while (1) {
4507 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4508 last if scalar(@{$currentrunningvcpus}) == $i-1;
4509 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4510 $retry++;
4511 sleep 1;
4512 }
4513 #update conf after each succesfull cpu unplug
4514 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4515 PVE::QemuConfig->write_config($vmid, $conf);
4516 }
4517 } else {
4518 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4519 }
4520
4521 return;
4522 }
4523
4524 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4525 die "vcpus in running vm does not match its configuration\n"
4526 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4527
4528 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4529
4530 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4531 my $cpustr = print_cpu_device($conf, $i);
4532 qemu_deviceadd($vmid, $cpustr);
4533
4534 my $retry = 0;
4535 my $currentrunningvcpus = undef;
4536 while (1) {
4537 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4538 last if scalar(@{$currentrunningvcpus}) == $i;
4539 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4540 sleep 1;
4541 $retry++;
4542 }
4543 #update conf after each succesfull cpu hotplug
4544 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4545 PVE::QemuConfig->write_config($vmid, $conf);
4546 }
4547 } else {
4548
4549 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4550 mon_cmd($vmid, "cpu-add", id => int($i));
4551 }
4552 }
4553}
4554
4555sub qemu_block_set_io_throttle {
4556 my ($vmid, $deviceid,
4557 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4558 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4559 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4560 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4561
4562 return if !check_running($vmid) ;
4563
4564 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4565 bps => int($bps),
4566 bps_rd => int($bps_rd),
4567 bps_wr => int($bps_wr),
4568 iops => int($iops),
4569 iops_rd => int($iops_rd),
4570 iops_wr => int($iops_wr),
4571 bps_max => int($bps_max),
4572 bps_rd_max => int($bps_rd_max),
4573 bps_wr_max => int($bps_wr_max),
4574 iops_max => int($iops_max),
4575 iops_rd_max => int($iops_rd_max),
4576 iops_wr_max => int($iops_wr_max),
4577 bps_max_length => int($bps_max_length),
4578 bps_rd_max_length => int($bps_rd_max_length),
4579 bps_wr_max_length => int($bps_wr_max_length),
4580 iops_max_length => int($iops_max_length),
4581 iops_rd_max_length => int($iops_rd_max_length),
4582 iops_wr_max_length => int($iops_wr_max_length),
4583 );
4584
4585}
4586
4587sub qemu_block_resize {
4588 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4589
4590 my $running = check_running($vmid);
4591
4592 $size = 0 if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4593
4594 return if !$running;
4595
4596 my $padding = (1024 - $size % 1024) % 1024;
4597 $size = $size + $padding;
4598
4599 mon_cmd(
4600 $vmid,
4601 "block_resize",
4602 device => $deviceid,
4603 size => int($size),
4604 timeout => 60,
4605 );
4606}
4607
4608sub qemu_volume_snapshot {
4609 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4610
4611 my $running = check_running($vmid);
4612
4613 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4614 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4615 } else {
4616 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4617 }
4618}
4619
4620sub qemu_volume_snapshot_delete {
4621 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4622
4623 my $running = check_running($vmid);
4624
4625 if($running) {
4626
4627 $running = undef;
4628 my $conf = PVE::QemuConfig->load_config($vmid);
4629 PVE::QemuConfig->foreach_volume($conf, sub {
4630 my ($ds, $drive) = @_;
4631 $running = 1 if $drive->{file} eq $volid;
4632 });
4633 }
4634
4635 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4636 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
4637 } else {
4638 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4639 }
4640}
4641
4642sub set_migration_caps {
4643 my ($vmid, $savevm) = @_;
4644
4645 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4646
4647 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4648 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4649
4650 my $cap_ref = [];
4651
4652 my $enabled_cap = {
4653 "auto-converge" => 1,
4654 "xbzrle" => 1,
4655 "x-rdma-pin-all" => 0,
4656 "zero-blocks" => 0,
4657 "compress" => 0,
4658 "dirty-bitmaps" => $dirty_bitmaps,
4659 };
4660
4661 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4662
4663 for my $supported_capability (@$supported_capabilities) {
4664 push @$cap_ref, {
4665 capability => $supported_capability->{capability},
4666 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4667 };
4668 }
4669
4670 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4671}
4672
4673sub foreach_volid {
4674 my ($conf, $func, @param) = @_;
4675
4676 my $volhash = {};
4677
4678 my $test_volid = sub {
4679 my ($key, $drive, $snapname) = @_;
4680
4681 my $volid = $drive->{file};
4682 return if !$volid;
4683
4684 $volhash->{$volid}->{cdrom} //= 1;
4685 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4686
4687 my $replicate = $drive->{replicate} // 1;
4688 $volhash->{$volid}->{replicate} //= 0;
4689 $volhash->{$volid}->{replicate} = 1 if $replicate;
4690
4691 $volhash->{$volid}->{shared} //= 0;
4692 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4693
4694 $volhash->{$volid}->{referenced_in_config} //= 0;
4695 $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname);
4696
4697 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4698 if defined($snapname);
4699
4700 my $size = $drive->{size};
4701 $volhash->{$volid}->{size} //= $size if $size;
4702
4703 $volhash->{$volid}->{is_vmstate} //= 0;
4704 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4705
4706 $volhash->{$volid}->{is_tpmstate} //= 0;
4707 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4708
4709 $volhash->{$volid}->{is_unused} //= 0;
4710 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4711
4712 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4713 };
4714
4715 my $include_opts = {
4716 extra_keys => ['vmstate'],
4717 include_unused => 1,
4718 };
4719
4720 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4721 foreach my $snapname (keys %{$conf->{snapshots}}) {
4722 my $snap = $conf->{snapshots}->{$snapname};
4723 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4724 }
4725
4726 foreach my $volid (keys %$volhash) {
4727 &$func($volid, $volhash->{$volid}, @param);
4728 }
4729}
4730
4731my $fast_plug_option = {
4732 'lock' => 1,
4733 'name' => 1,
4734 'onboot' => 1,
4735 'shares' => 1,
4736 'startup' => 1,
4737 'description' => 1,
4738 'protection' => 1,
4739 'vmstatestorage' => 1,
4740 'hookscript' => 1,
4741 'tags' => 1,
4742};
4743
4744# hotplug changes in [PENDING]
4745# $selection hash can be used to only apply specified options, for
4746# example: { cores => 1 } (only apply changed 'cores')
4747# $errors ref is used to return error messages
4748sub vmconfig_hotplug_pending {
4749 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4750
4751 my $defaults = load_defaults();
4752 my $arch = get_vm_arch($conf);
4753 my $machine_type = get_vm_machine($conf, undef, $arch);
4754
4755 # commit values which do not have any impact on running VM first
4756 # Note: those option cannot raise errors, we we do not care about
4757 # $selection and always apply them.
4758
4759 my $add_error = sub {
4760 my ($opt, $msg) = @_;
4761 $errors->{$opt} = "hotplug problem - $msg";
4762 };
4763
4764 my $changes = 0;
4765 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4766 if ($fast_plug_option->{$opt}) {
4767 $conf->{$opt} = $conf->{pending}->{$opt};
4768 delete $conf->{pending}->{$opt};
4769 $changes = 1;
4770 }
4771 }
4772
4773 if ($changes) {
4774 PVE::QemuConfig->write_config($vmid, $conf);
4775 }
4776
4777 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
4778
4779 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
4780 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4781 foreach my $opt (sort keys %$pending_delete_hash) {
4782 next if $selection && !$selection->{$opt};
4783 my $force = $pending_delete_hash->{$opt}->{force};
4784 eval {
4785 if ($opt eq 'hotplug') {
4786 die "skip\n" if ($conf->{hotplug} =~ /memory/);
4787 } elsif ($opt eq 'tablet') {
4788 die "skip\n" if !$hotplug_features->{usb};
4789 if ($defaults->{tablet}) {
4790 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4791 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4792 if $arch eq 'aarch64';
4793 } else {
4794 vm_deviceunplug($vmid, $conf, 'tablet');
4795 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4796 }
4797 } elsif ($opt =~ m/^usb\d+/) {
4798 die "skip\n";
4799 # since we cannot reliably hot unplug usb devices we are disabling it
4800 #die "skip\n" if !$hotplug_features->{usb} || $conf->{$opt} =~ m/spice/i;
4801 #vm_deviceunplug($vmid, $conf, $opt);
4802 } elsif ($opt eq 'vcpus') {
4803 die "skip\n" if !$hotplug_features->{cpu};
4804 qemu_cpu_hotplug($vmid, $conf, undef);
4805 } elsif ($opt eq 'balloon') {
4806 # enable balloon device is not hotpluggable
4807 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
4808 # here we reset the ballooning value to memory
4809 my $balloon = $conf->{memory} || $defaults->{memory};
4810 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4811 } elsif ($fast_plug_option->{$opt}) {
4812 # do nothing
4813 } elsif ($opt =~ m/^net(\d+)$/) {
4814 die "skip\n" if !$hotplug_features->{network};
4815 vm_deviceunplug($vmid, $conf, $opt);
4816 } elsif (is_valid_drivename($opt)) {
4817 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
4818 vm_deviceunplug($vmid, $conf, $opt);
4819 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4820 } elsif ($opt =~ m/^memory$/) {
4821 die "skip\n" if !$hotplug_features->{memory};
4822 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
4823 } elsif ($opt eq 'cpuunits') {
4824 $cgroup->change_cpu_shares(undef, 1024);
4825 } elsif ($opt eq 'cpulimit') {
4826 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
4827 } else {
4828 die "skip\n";
4829 }
4830 };
4831 if (my $err = $@) {
4832 &$add_error($opt, $err) if $err ne "skip\n";
4833 } else {
4834 delete $conf->{$opt};
4835 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4836 }
4837 }
4838
4839 my ($apply_pending_cloudinit, $apply_pending_cloudinit_done);
4840 $apply_pending_cloudinit = sub {
4841 return if $apply_pending_cloudinit_done; # once is enough
4842 $apply_pending_cloudinit_done = 1; # once is enough
4843
4844 my ($key, $value) = @_;
4845
4846 my @cloudinit_opts = keys %$confdesc_cloudinit;
4847 foreach my $opt (keys %{$conf->{pending}}) {
4848 next if !grep { $_ eq $opt } @cloudinit_opts;
4849 $conf->{$opt} = delete $conf->{pending}->{$opt};
4850 }
4851
4852 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4853 foreach my $opt (sort keys %$pending_delete_hash) {
4854 next if !grep { $_ eq $opt } @cloudinit_opts;
4855 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4856 delete $conf->{$opt};
4857 }
4858
4859 my $new_conf = { %$conf };
4860 $new_conf->{$key} = $value;
4861 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($new_conf, $vmid);
4862 };
4863
4864 foreach my $opt (keys %{$conf->{pending}}) {
4865 next if $selection && !$selection->{$opt};
4866 my $value = $conf->{pending}->{$opt};
4867 eval {
4868 if ($opt eq 'hotplug') {
4869 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
4870 } elsif ($opt eq 'tablet') {
4871 die "skip\n" if !$hotplug_features->{usb};
4872 if ($value == 1) {
4873 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4874 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4875 if $arch eq 'aarch64';
4876 } elsif ($value == 0) {
4877 vm_deviceunplug($vmid, $conf, 'tablet');
4878 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4879 }
4880 } elsif ($opt =~ m/^usb\d+$/) {
4881 die "skip\n";
4882 # since we cannot reliably hot unplug usb devices we disable it for now
4883 #die "skip\n" if !$hotplug_features->{usb} || $value =~ m/spice/i;
4884 #my $d = eval { parse_property_string($usbdesc->{format}, $value) };
4885 #die "skip\n" if !$d;
4886 #qemu_usb_hotplug($storecfg, $conf, $vmid, $opt, $d, $arch, $machine_type);
4887 } elsif ($opt eq 'vcpus') {
4888 die "skip\n" if !$hotplug_features->{cpu};
4889 qemu_cpu_hotplug($vmid, $conf, $value);
4890 } elsif ($opt eq 'balloon') {
4891 # enable/disable balloning device is not hotpluggable
4892 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
4893 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
4894 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
4895
4896 # allow manual ballooning if shares is set to zero
4897 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
4898 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
4899 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4900 }
4901 } elsif ($opt =~ m/^net(\d+)$/) {
4902 # some changes can be done without hotplug
4903 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
4904 $vmid, $opt, $value, $arch, $machine_type);
4905 } elsif (is_valid_drivename($opt)) {
4906 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
4907 # some changes can be done without hotplug
4908 my $drive = parse_drive($opt, $value);
4909 if (drive_is_cloudinit($drive)) {
4910 &$apply_pending_cloudinit($opt, $value);
4911 }
4912 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
4913 $vmid, $opt, $value, $arch, $machine_type);
4914 } elsif ($opt =~ m/^memory$/) { #dimms
4915 die "skip\n" if !$hotplug_features->{memory};
4916 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
4917 } elsif ($opt eq 'cpuunits') {
4918 $cgroup->change_cpu_shares($conf->{pending}->{$opt}, 1024);
4919 } elsif ($opt eq 'cpulimit') {
4920 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
4921 $cgroup->change_cpu_quota($cpulimit, 100000);
4922 } elsif ($opt eq 'agent') {
4923 vmconfig_update_agent($conf, $opt, $value);
4924 } else {
4925 die "skip\n"; # skip non-hot-pluggable options
4926 }
4927 };
4928 if (my $err = $@) {
4929 &$add_error($opt, $err) if $err ne "skip\n";
4930 } else {
4931 $conf->{$opt} = $value;
4932 delete $conf->{pending}->{$opt};
4933 }
4934 }
4935
4936 PVE::QemuConfig->write_config($vmid, $conf);
4937}
4938
4939sub try_deallocate_drive {
4940 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
4941
4942 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
4943 my $volid = $drive->{file};
4944 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
4945 my $sid = PVE::Storage::parse_volume_id($volid);
4946 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
4947
4948 # check if the disk is really unused
4949 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
4950 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
4951 PVE::Storage::vdisk_free($storecfg, $volid);
4952 return 1;
4953 } else {
4954 # If vm is not owner of this disk remove from config
4955 return 1;
4956 }
4957 }
4958
4959 return;
4960}
4961
4962sub vmconfig_delete_or_detach_drive {
4963 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
4964
4965 my $drive = parse_drive($opt, $conf->{$opt});
4966
4967 my $rpcenv = PVE::RPCEnvironment::get();
4968 my $authuser = $rpcenv->get_user();
4969
4970 if ($force) {
4971 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
4972 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
4973 } else {
4974 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
4975 }
4976}
4977
4978
4979
4980sub vmconfig_apply_pending {
4981 my ($vmid, $conf, $storecfg, $errors) = @_;
4982
4983 return if !scalar(keys %{$conf->{pending}});
4984
4985 my $add_apply_error = sub {
4986 my ($opt, $msg) = @_;
4987 my $err_msg = "unable to apply pending change $opt : $msg";
4988 $errors->{$opt} = $err_msg;
4989 warn $err_msg;
4990 };
4991
4992 # cold plug
4993
4994 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4995 foreach my $opt (sort keys %$pending_delete_hash) {
4996 my $force = $pending_delete_hash->{$opt}->{force};
4997 eval {
4998 if ($opt =~ m/^unused/) {
4999 die "internal error";
5000 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5001 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5002 }
5003 };
5004 if (my $err = $@) {
5005 $add_apply_error->($opt, $err);
5006 } else {
5007 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5008 delete $conf->{$opt};
5009 }
5010 }
5011
5012 PVE::QemuConfig->cleanup_pending($conf);
5013
5014 foreach my $opt (keys %{$conf->{pending}}) { # add/change
5015 next if $opt eq 'delete'; # just to be sure
5016 eval {
5017 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5018 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
5019 }
5020 };
5021 if (my $err = $@) {
5022 $add_apply_error->($opt, $err);
5023 } else {
5024 $conf->{$opt} = delete $conf->{pending}->{$opt};
5025 }
5026 }
5027
5028 # write all changes at once to avoid unnecessary i/o
5029 PVE::QemuConfig->write_config($vmid, $conf);
5030}
5031
5032sub vmconfig_update_net {
5033 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5034
5035 my $newnet = parse_net($value);
5036
5037 if ($conf->{$opt}) {
5038 my $oldnet = parse_net($conf->{$opt});
5039
5040 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5041 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5042 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
5043 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
5044
5045 # for non online change, we try to hot-unplug
5046 die "skip\n" if !$hotplug;
5047 vm_deviceunplug($vmid, $conf, $opt);
5048 } else {
5049
5050 die "internal error" if $opt !~ m/net(\d+)/;
5051 my $iface = "tap${vmid}i$1";
5052
5053 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5054 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5055 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5056 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
5057 PVE::Network::tap_unplug($iface);
5058
5059 if ($have_sdn) {
5060 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5061 } else {
5062 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5063 }
5064 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
5065 # Rate can be applied on its own but any change above needs to
5066 # include the rate in tap_plug since OVS resets everything.
5067 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
5068 }
5069
5070 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
5071 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5072 }
5073
5074 return 1;
5075 }
5076 }
5077
5078 if ($hotplug) {
5079 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5080 } else {
5081 die "skip\n";
5082 }
5083}
5084
5085sub vmconfig_update_agent {
5086 my ($conf, $opt, $value) = @_;
5087
5088 die "skip\n" if !$conf->{$opt};
5089
5090 my $hotplug_options = { fstrim_cloned_disks => 1 };
5091
5092 my $old_agent = parse_guest_agent($conf);
5093 my $agent = parse_guest_agent({$opt => $value});
5094
5095 for my $option (keys %$agent) { # added/changed options
5096 next if defined($hotplug_options->{$option});
5097 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5098 }
5099
5100 for my $option (keys %$old_agent) { # removed options
5101 next if defined($hotplug_options->{$option});
5102 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5103 }
5104
5105 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
5106}
5107
5108sub vmconfig_update_disk {
5109 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5110
5111 my $drive = parse_drive($opt, $value);
5112
5113 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5114 my $media = $drive->{media} || 'disk';
5115 my $oldmedia = $old_drive->{media} || 'disk';
5116 die "unable to change media type\n" if $media ne $oldmedia;
5117
5118 if (!drive_is_cdrom($old_drive)) {
5119
5120 if ($drive->{file} ne $old_drive->{file}) {
5121
5122 die "skip\n" if !$hotplug;
5123
5124 # unplug and register as unused
5125 vm_deviceunplug($vmid, $conf, $opt);
5126 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5127
5128 } else {
5129 # update existing disk
5130
5131 # skip non hotpluggable value
5132 if (safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5133 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5134 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5135 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5136 safe_string_ne($drive->{ssd}, $old_drive->{ssd})) {
5137 die "skip\n";
5138 }
5139
5140 # apply throttle
5141 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5142 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5143 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5144 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5145 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5146 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5147 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5148 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5149 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5150 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5151 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5152 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5153 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5154 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5155 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5156 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5157 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5158 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5159
5160 qemu_block_set_io_throttle(
5161 $vmid,"drive-$opt",
5162 ($drive->{mbps} || 0)*1024*1024,
5163 ($drive->{mbps_rd} || 0)*1024*1024,
5164 ($drive->{mbps_wr} || 0)*1024*1024,
5165 $drive->{iops} || 0,
5166 $drive->{iops_rd} || 0,
5167 $drive->{iops_wr} || 0,
5168 ($drive->{mbps_max} || 0)*1024*1024,
5169 ($drive->{mbps_rd_max} || 0)*1024*1024,
5170 ($drive->{mbps_wr_max} || 0)*1024*1024,
5171 $drive->{iops_max} || 0,
5172 $drive->{iops_rd_max} || 0,
5173 $drive->{iops_wr_max} || 0,
5174 $drive->{bps_max_length} || 1,
5175 $drive->{bps_rd_max_length} || 1,
5176 $drive->{bps_wr_max_length} || 1,
5177 $drive->{iops_max_length} || 1,
5178 $drive->{iops_rd_max_length} || 1,
5179 $drive->{iops_wr_max_length} || 1,
5180 );
5181
5182 }
5183
5184 return 1;
5185 }
5186
5187 } else { # cdrom
5188
5189 if ($drive->{file} eq 'none') {
5190 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5191 if (drive_is_cloudinit($old_drive)) {
5192 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5193 }
5194 } else {
5195 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5196
5197 # force eject if locked
5198 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5199
5200 if ($path) {
5201 mon_cmd($vmid, "blockdev-change-medium",
5202 id => "$opt", filename => "$path");
5203 }
5204 }
5205
5206 return 1;
5207 }
5208 }
5209
5210 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5211 # hotplug new disks
5212 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5213 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5214}
5215
5216# called in locked context by incoming migration
5217sub vm_migrate_get_nbd_disks {
5218 my ($storecfg, $conf, $replicated_volumes) = @_;
5219
5220 my $local_volumes = {};
5221 PVE::QemuConfig->foreach_volume($conf, sub {
5222 my ($ds, $drive) = @_;
5223
5224 return if drive_is_cdrom($drive);
5225 return if $ds eq 'tpmstate0';
5226
5227 my $volid = $drive->{file};
5228
5229 return if !$volid;
5230
5231 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5232
5233 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5234 return if $scfg->{shared};
5235
5236 # replicated disks re-use existing state via bitmap
5237 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5238 $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
5239 });
5240 return $local_volumes;
5241}
5242
5243# called in locked context by incoming migration
5244sub vm_migrate_alloc_nbd_disks {
5245 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5246
5247 my $format = undef;
5248
5249 my $nbd = {};
5250 foreach my $opt (sort keys %$source_volumes) {
5251 my ($volid, $storeid, $volname, $drive, $use_existing) = @{$source_volumes->{$opt}};
5252
5253 if ($use_existing) {
5254 $nbd->{$opt}->{drivestr} = print_drive($drive);
5255 $nbd->{$opt}->{volid} = $volid;
5256 $nbd->{$opt}->{replicated} = 1;
5257 next;
5258 }
5259
5260 # If a remote storage is specified and the format of the original
5261 # volume is not available there, fall back to the default format.
5262 # Otherwise use the same format as the original.
5263 if (!$storagemap->{identity}) {
5264 $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
5265 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5266 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5267 my $fileFormat = qemu_img_format($scfg, $volname);
5268 $format = (grep {$fileFormat eq $_} @{$validFormats}) ? $fileFormat : $defFormat;
5269 } else {
5270 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5271 $format = qemu_img_format($scfg, $volname);
5272 }
5273
5274 my $size = $drive->{size} / 1024;
5275 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5276 my $newdrive = $drive;
5277 $newdrive->{format} = $format;
5278 $newdrive->{file} = $newvolid;
5279 my $drivestr = print_drive($newdrive);
5280 $nbd->{$opt}->{drivestr} = $drivestr;
5281 $nbd->{$opt}->{volid} = $newvolid;
5282 }
5283
5284 return $nbd;
5285}
5286
5287# see vm_start_nolock for parameters, additionally:
5288# migrate_opts:
5289# storagemap = parsed storage map for allocating NBD disks
5290sub vm_start {
5291 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5292
5293 return PVE::QemuConfig->lock_config($vmid, sub {
5294 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5295
5296 die "you can't start a vm if it's a template\n"
5297 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5298
5299 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5300 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5301
5302 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5303
5304 if ($has_backup_lock && $running) {
5305 # a backup is currently running, attempt to start the guest in the
5306 # existing QEMU instance
5307 return vm_resume($vmid);
5308 }
5309
5310 PVE::QemuConfig->check_lock($conf)
5311 if !($params->{skiplock} || $has_suspended_lock);
5312
5313 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5314
5315 die "VM $vmid already running\n" if $running;
5316
5317 if (my $storagemap = $migrate_opts->{storagemap}) {
5318 my $replicated = $migrate_opts->{replicated_volumes};
5319 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5320 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5321
5322 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5323 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5324 }
5325 }
5326
5327 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5328 });
5329}
5330
5331
5332# params:
5333# statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5334# skiplock => 0/1, skip checking for config lock
5335# skiptemplate => 0/1, skip checking whether VM is template
5336# forcemachine => to force Qemu machine (rollback/migration)
5337# forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5338# timeout => in seconds
5339# paused => start VM in paused state (backup)
5340# resume => resume from hibernation
5341# pbs-backing => {
5342# sata0 => {
5343# repository
5344# snapshot
5345# keyfile
5346# archive
5347# },
5348# virtio2 => ...
5349# }
5350# migrate_opts:
5351# nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5352# migratedfrom => source node
5353# spice_ticket => used for spice migration, passed via tunnel/stdin
5354# network => CIDR of migration network
5355# type => secure/insecure - tunnel over encrypted connection or plain-text
5356# nbd_proto_version => int, 0 for TCP, 1 for UNIX
5357# replicated_volumes => which volids should be re-used with bitmaps for nbd migration
5358# tpmstate_vol => new volid of tpmstate0, not yet contained in config
5359sub vm_start_nolock {
5360 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5361
5362 my $statefile = $params->{statefile};
5363 my $resume = $params->{resume};
5364
5365 my $migratedfrom = $migrate_opts->{migratedfrom};
5366 my $migration_type = $migrate_opts->{type};
5367
5368 my $res = {};
5369
5370 # clean up leftover reboot request files
5371 eval { clear_reboot_request($vmid); };
5372 warn $@ if $@;
5373
5374 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5375 vmconfig_apply_pending($vmid, $conf, $storecfg);
5376 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5377 }
5378
5379 # don't regenerate the ISO if the VM is started as part of a live migration
5380 # this way we can reuse the old ISO with the correct config
5381 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if !$migratedfrom;
5382
5383 # override TPM state vol if migrated, conf is out of date still
5384 if (my $tpmvol = $migrate_opts->{tpmstate_vol}) {
5385 my $parsed = parse_drive("tpmstate0", $conf->{tpmstate0});
5386 $parsed->{file} = $tpmvol;
5387 $conf->{tpmstate0} = print_drive($parsed);
5388 }
5389
5390 my $defaults = load_defaults();
5391
5392 # set environment variable useful inside network script
5393 $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom;
5394
5395 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5396
5397 my $forcemachine = $params->{forcemachine};
5398 my $forcecpu = $params->{forcecpu};
5399 if ($resume) {
5400 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5401 $forcemachine = $conf->{runningmachine};
5402 $forcecpu = $conf->{runningcpu};
5403 print "Resuming suspended VM\n";
5404 }
5405
5406 my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid,
5407 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
5408
5409 my $migration_ip;
5410 my $get_migration_ip = sub {
5411 my ($nodename) = @_;
5412
5413 return $migration_ip if defined($migration_ip);
5414
5415 my $cidr = $migrate_opts->{network};
5416
5417 if (!defined($cidr)) {
5418 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5419 $cidr = $dc_conf->{migration}->{network};
5420 }
5421
5422 if (defined($cidr)) {
5423 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5424
5425 die "could not get IP: no address configured on local " .
5426 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5427
5428 die "could not get IP: multiple addresses configured on local " .
5429 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5430
5431 $migration_ip = @$ips[0];
5432 }
5433
5434 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5435 if !defined($migration_ip);
5436
5437 return $migration_ip;
5438 };
5439
5440 my $migrate_uri;
5441 if ($statefile) {
5442 if ($statefile eq 'tcp') {
5443 my $localip = "localhost";
5444 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5445 my $nodename = nodename();
5446
5447 if (!defined($migration_type)) {
5448 if (defined($datacenterconf->{migration}->{type})) {
5449 $migration_type = $datacenterconf->{migration}->{type};
5450 } else {
5451 $migration_type = 'secure';
5452 }
5453 }
5454
5455 if ($migration_type eq 'insecure') {
5456 $localip = $get_migration_ip->($nodename);
5457 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5458 }
5459
5460 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5461 my $migrate_port = PVE::Tools::next_migrate_port($pfamily);
5462 $migrate_uri = "tcp:${localip}:${migrate_port}";
5463 push @$cmd, '-incoming', $migrate_uri;
5464 push @$cmd, '-S';
5465
5466 } elsif ($statefile eq 'unix') {
5467 # should be default for secure migrations as a ssh TCP forward
5468 # tunnel is not deterministic reliable ready and fails regurarly
5469 # to set up in time, so use UNIX socket forwards
5470 my $socket_addr = "/run/qemu-server/$vmid.migrate";
5471 unlink $socket_addr;
5472
5473 $migrate_uri = "unix:$socket_addr";
5474
5475 push @$cmd, '-incoming', $migrate_uri;
5476 push @$cmd, '-S';
5477
5478 } elsif (-e $statefile) {
5479 push @$cmd, '-loadstate', $statefile;
5480 } else {
5481 my $statepath = PVE::Storage::path($storecfg, $statefile);
5482 push @$vollist, $statefile;
5483 push @$cmd, '-loadstate', $statepath;
5484 }
5485 } elsif ($params->{paused}) {
5486 push @$cmd, '-S';
5487 }
5488
5489 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5490
5491 my $pci_devices = {}; # host pci devices
5492 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
5493 my $dev = $conf->{"hostpci$i"} or next;
5494 $pci_devices->{$i} = parse_hostpci($dev);
5495 }
5496
5497 # do not reserve pciid for mediated devices, sysfs will error out for duplicate assignment
5498 my $real_pci_devices = [ grep { !(defined($_->{mdev}) && scalar($_->{pciid}->@*) == 1) } values $pci_devices->%* ];
5499
5500 # map to a flat list of pci ids
5501 my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } $real_pci_devices->@* ];
5502
5503 # reserve all PCI IDs before actually doing anything with them
5504 PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout);
5505
5506 eval {
5507 for my $id (sort keys %$pci_devices) {
5508 my $d = $pci_devices->{$id};
5509 for my $dev ($d->{pciid}->@*) {
5510 PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
5511 }
5512 }
5513 };
5514 if (my $err = $@) {
5515 eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
5516 warn $@ if $@;
5517 die $err;
5518 }
5519
5520 PVE::Storage::activate_volumes($storecfg, $vollist);
5521
5522 eval {
5523 run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{});
5524 };
5525 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5526 # timeout should be more than enough here...
5527 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 5);
5528
5529 my $cpuunits = get_cpuunits($conf);
5530
5531 my %run_params = (
5532 timeout => $statefile ? undef : $start_timeout,
5533 umask => 0077,
5534 noerr => 1,
5535 );
5536
5537 # when migrating, prefix QEMU output so other side can pick up any
5538 # errors that might occur and show the user
5539 if ($migratedfrom) {
5540 $run_params{quiet} = 1;
5541 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5542 }
5543
5544 my %systemd_properties = (
5545 Slice => 'qemu.slice',
5546 KillMode => 'process',
5547 SendSIGKILL => 0,
5548 TimeoutStopUSec => ULONG_MAX, # infinity
5549 );
5550
5551 if (PVE::CGroup::cgroup_mode() == 2) {
5552 $cpuunits = 10000 if $cpuunits >= 10000; # else we get an error
5553 $systemd_properties{CPUWeight} = $cpuunits;
5554 } else {
5555 $systemd_properties{CPUShares} = $cpuunits;
5556 }
5557
5558 if (my $cpulimit = $conf->{cpulimit}) {
5559 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5560 }
5561 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5562
5563 my $run_qemu = sub {
5564 PVE::Tools::run_fork sub {
5565 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5566
5567 my $tpmpid;
5568 if (my $tpm = $conf->{tpmstate0}) {
5569 # start the TPM emulator so QEMU can connect on start
5570 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5571 }
5572
5573 my $exitcode = run_command($cmd, %run_params);
5574 if ($exitcode) {
5575 if ($tpmpid) {
5576 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5577 kill 'TERM', $tpmpid;
5578 }
5579 die "QEMU exited with code $exitcode\n";
5580 }
5581 };
5582 };
5583
5584 if ($conf->{hugepages}) {
5585
5586 my $code = sub {
5587 my $hugepages_topology = PVE::QemuServer::Memory::hugepages_topology($conf);
5588 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5589
5590 PVE::QemuServer::Memory::hugepages_mount();
5591 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5592
5593 eval { $run_qemu->() };
5594 if (my $err = $@) {
5595 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5596 if !$conf->{keephugepages};
5597 die $err;
5598 }
5599
5600 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5601 if !$conf->{keephugepages};
5602 };
5603 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5604
5605 } else {
5606 eval { $run_qemu->() };
5607 }
5608
5609 if (my $err = $@) {
5610 # deactivate volumes if start fails
5611 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5612 eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
5613
5614 die "start failed: $err";
5615 }
5616
5617 # re-reserve all PCI IDs now that we can know the actual VM PID
5618 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5619 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) };
5620 warn $@ if $@;
5621
5622 print "migration listens on $migrate_uri\n" if $migrate_uri;
5623 $res->{migrate_uri} = $migrate_uri;
5624
5625 if ($statefile && $statefile ne 'tcp' && $statefile ne 'unix') {
5626 eval { mon_cmd($vmid, "cont"); };
5627 warn $@ if $@;
5628 }
5629
5630 #start nbd server for storage migration
5631 if (my $nbd = $migrate_opts->{nbd}) {
5632 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
5633
5634 my $migrate_storage_uri;
5635 # nbd_protocol_version > 0 for unix socket support
5636 if ($nbd_protocol_version > 0 && $migration_type eq 'secure') {
5637 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5638 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5639 $migrate_storage_uri = "nbd:unix:$socket_path";
5640 } else {
5641 my $nodename = nodename();
5642 my $localip = $get_migration_ip->($nodename);
5643 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5644 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
5645
5646 mon_cmd($vmid, "nbd-server-start", addr => {
5647 type => 'inet',
5648 data => {
5649 host => "${localip}",
5650 port => "${storage_migrate_port}",
5651 },
5652 });
5653 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5654 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
5655 }
5656
5657 $res->{migrate_storage_uri} = $migrate_storage_uri;
5658
5659 foreach my $opt (sort keys %$nbd) {
5660 my $drivestr = $nbd->{$opt}->{drivestr};
5661 my $volid = $nbd->{$opt}->{volid};
5662 mon_cmd($vmid, "nbd-server-add", device => "drive-$opt", writable => JSON::true );
5663 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
5664 print "storage migration listens on $nbd_uri volume:$drivestr\n";
5665 print "re-using replicated volume: $opt - $volid\n"
5666 if $nbd->{$opt}->{replicated};
5667
5668 $res->{drives}->{$opt} = $nbd->{$opt};
5669 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
5670 }
5671 }
5672
5673 if ($migratedfrom) {
5674 eval {
5675 set_migration_caps($vmid);
5676 };
5677 warn $@ if $@;
5678
5679 if ($spice_port) {
5680 print "spice listens on port $spice_port\n";
5681 $res->{spice_port} = $spice_port;
5682 if ($migrate_opts->{spice_ticket}) {
5683 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
5684 $migrate_opts->{spice_ticket});
5685 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
5686 }
5687 }
5688
5689 } else {
5690 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
5691 if !$statefile && $conf->{balloon};
5692
5693 foreach my $opt (keys %$conf) {
5694 next if $opt !~ m/^net\d+$/;
5695 my $nicconf = parse_net($conf->{$opt});
5696 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
5697 }
5698 }
5699
5700 mon_cmd($vmid, 'qom-set',
5701 path => "machine/peripheral/balloon0",
5702 property => "guest-stats-polling-interval",
5703 value => 2) if (!defined($conf->{balloon}) || $conf->{balloon});
5704
5705 if ($resume) {
5706 print "Resumed VM, removing state\n";
5707 if (my $vmstate = $conf->{vmstate}) {
5708 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
5709 PVE::Storage::vdisk_free($storecfg, $vmstate);
5710 }
5711 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
5712 PVE::QemuConfig->write_config($vmid, $conf);
5713 }
5714
5715 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
5716
5717 return $res;
5718}
5719
5720sub vm_commandline {
5721 my ($storecfg, $vmid, $snapname) = @_;
5722
5723 my $conf = PVE::QemuConfig->load_config($vmid);
5724
5725 my ($forcemachine, $forcecpu);
5726 if ($snapname) {
5727 my $snapshot = $conf->{snapshots}->{$snapname};
5728 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
5729
5730 # check for machine or CPU overrides in snapshot
5731 $forcemachine = $snapshot->{runningmachine};
5732 $forcecpu = $snapshot->{runningcpu};
5733
5734 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
5735
5736 $conf = $snapshot;
5737 }
5738
5739 my $defaults = load_defaults();
5740
5741 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
5742
5743 return PVE::Tools::cmd2string($cmd);
5744}
5745
5746sub vm_reset {
5747 my ($vmid, $skiplock) = @_;
5748
5749 PVE::QemuConfig->lock_config($vmid, sub {
5750
5751 my $conf = PVE::QemuConfig->load_config($vmid);
5752
5753 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5754
5755 mon_cmd($vmid, "system_reset");
5756 });
5757}
5758
5759sub get_vm_volumes {
5760 my ($conf) = @_;
5761
5762 my $vollist = [];
5763 foreach_volid($conf, sub {
5764 my ($volid, $attr) = @_;
5765
5766 return if $volid =~ m|^/|;
5767
5768 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
5769 return if !$sid;
5770
5771 push @$vollist, $volid;
5772 });
5773
5774 return $vollist;
5775}
5776
5777sub vm_stop_cleanup {
5778 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
5779
5780 eval {
5781
5782 if (!$keepActive) {
5783 my $vollist = get_vm_volumes($conf);
5784 PVE::Storage::deactivate_volumes($storecfg, $vollist);
5785
5786 if (my $tpmdrive = $conf->{tpmstate0}) {
5787 my $tpm = parse_drive("tpmstate0", $tpmdrive);
5788 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
5789 if ($storeid) {
5790 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
5791 }
5792 }
5793 }
5794
5795 foreach my $ext (qw(mon qmp pid vnc qga)) {
5796 unlink "/var/run/qemu-server/${vmid}.$ext";
5797 }
5798
5799 if ($conf->{ivshmem}) {
5800 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
5801 # just delete it for now, VMs which have this already open do not
5802 # are affected, but new VMs will get a separated one. If this
5803 # becomes an issue we either add some sort of ref-counting or just
5804 # add a "don't delete on stop" flag to the ivshmem format.
5805 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
5806 }
5807
5808 my $ids = [];
5809 foreach my $key (keys %$conf) {
5810 next if $key !~ m/^hostpci(\d+)$/;
5811 my $hostpciindex = $1;
5812 my $d = parse_hostpci($conf->{$key});
5813 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
5814
5815 foreach my $pci (@{$d->{pciid}}) {
5816 my $pciid = $pci->{id};
5817 push @$ids, $pci->{id};
5818 PVE::SysFSTools::pci_cleanup_mdev_device($pciid, $uuid);
5819 }
5820 }
5821 PVE::QemuServer::PCI::remove_pci_reservation($ids);
5822
5823 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
5824 };
5825 warn $@ if $@; # avoid errors - just warn
5826}
5827
5828# call only in locked context
5829sub _do_vm_stop {
5830 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
5831
5832 my $pid = check_running($vmid, $nocheck);
5833 return if !$pid;
5834
5835 my $conf;
5836 if (!$nocheck) {
5837 $conf = PVE::QemuConfig->load_config($vmid);
5838 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5839 if (!defined($timeout) && $shutdown && $conf->{startup}) {
5840 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
5841 $timeout = $opts->{down} if $opts->{down};
5842 }
5843 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
5844 }
5845
5846 eval {
5847 if ($shutdown) {
5848 if (defined($conf) && get_qga_key($conf, 'enabled')) {
5849 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
5850 } else {
5851 mon_cmd($vmid, "system_powerdown");
5852 }
5853 } else {
5854 mon_cmd($vmid, "quit");
5855 }
5856 };
5857 my $err = $@;
5858
5859 if (!$err) {
5860 $timeout = 60 if !defined($timeout);
5861
5862 my $count = 0;
5863 while (($count < $timeout) && check_running($vmid, $nocheck)) {
5864 $count++;
5865 sleep 1;
5866 }
5867
5868 if ($count >= $timeout) {
5869 if ($force) {
5870 warn "VM still running - terminating now with SIGTERM\n";
5871 kill 15, $pid;
5872 } else {
5873 die "VM quit/powerdown failed - got timeout\n";
5874 }
5875 } else {
5876 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
5877 return;
5878 }
5879 } else {
5880 if (!check_running($vmid, $nocheck)) {
5881 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
5882 return;
5883 }
5884 if ($force) {
5885 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
5886 kill 15, $pid;
5887 } else {
5888 die "VM quit/powerdown failed\n";
5889 }
5890 }
5891
5892 # wait again
5893 $timeout = 10;
5894
5895 my $count = 0;
5896 while (($count < $timeout) && check_running($vmid, $nocheck)) {
5897 $count++;
5898 sleep 1;
5899 }
5900
5901 if ($count >= $timeout) {
5902 warn "VM still running - terminating now with SIGKILL\n";
5903 kill 9, $pid;
5904 sleep 1;
5905 }
5906
5907 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
5908}
5909
5910# Note: use $nocheck to skip tests if VM configuration file exists.
5911# We need that when migration VMs to other nodes (files already moved)
5912# Note: we set $keepActive in vzdump stop mode - volumes need to stay active
5913sub vm_stop {
5914 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
5915
5916 $force = 1 if !defined($force) && !$shutdown;
5917
5918 if ($migratedfrom){
5919 my $pid = check_running($vmid, $nocheck, $migratedfrom);
5920 kill 15, $pid if $pid;
5921 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
5922 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
5923 return;
5924 }
5925
5926 PVE::QemuConfig->lock_config($vmid, sub {
5927 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
5928 });
5929}
5930
5931sub vm_reboot {
5932 my ($vmid, $timeout) = @_;
5933
5934 PVE::QemuConfig->lock_config($vmid, sub {
5935 eval {
5936
5937 # only reboot if running, as qmeventd starts it again on a stop event
5938 return if !check_running($vmid);
5939
5940 create_reboot_request($vmid);
5941
5942 my $storecfg = PVE::Storage::config();
5943 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
5944
5945 };
5946 if (my $err = $@) {
5947 # avoid that the next normal shutdown will be confused for a reboot
5948 clear_reboot_request($vmid);
5949 die $err;
5950 }
5951 });
5952}
5953
5954# note: if using the statestorage parameter, the caller has to check privileges
5955sub vm_suspend {
5956 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
5957
5958 my $conf;
5959 my $path;
5960 my $storecfg;
5961 my $vmstate;
5962
5963 PVE::QemuConfig->lock_config($vmid, sub {
5964
5965 $conf = PVE::QemuConfig->load_config($vmid);
5966
5967 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
5968 PVE::QemuConfig->check_lock($conf)
5969 if !($skiplock || $is_backing_up);
5970
5971 die "cannot suspend to disk during backup\n"
5972 if $is_backing_up && $includestate;
5973
5974 if ($includestate) {
5975 $conf->{lock} = 'suspending';
5976 my $date = strftime("%Y-%m-%d", localtime(time()));
5977 $storecfg = PVE::Storage::config();
5978 if (!$statestorage) {
5979 $statestorage = find_vmstate_storage($conf, $storecfg);
5980 # check permissions for the storage
5981 my $rpcenv = PVE::RPCEnvironment::get();
5982 if ($rpcenv->{type} ne 'cli') {
5983 my $authuser = $rpcenv->get_user();
5984 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
5985 }
5986 }
5987
5988
5989 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
5990 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
5991 $path = PVE::Storage::path($storecfg, $vmstate);
5992 PVE::QemuConfig->write_config($vmid, $conf);
5993 } else {
5994 mon_cmd($vmid, "stop");
5995 }
5996 });
5997
5998 if ($includestate) {
5999 # save vm state
6000 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
6001
6002 eval {
6003 set_migration_caps($vmid, 1);
6004 mon_cmd($vmid, "savevm-start", statefile => $path);
6005 for(;;) {
6006 my $state = mon_cmd($vmid, "query-savevm");
6007 if (!$state->{status}) {
6008 die "savevm not active\n";
6009 } elsif ($state->{status} eq 'active') {
6010 sleep(1);
6011 next;
6012 } elsif ($state->{status} eq 'completed') {
6013 print "State saved, quitting\n";
6014 last;
6015 } elsif ($state->{status} eq 'failed' && $state->{error}) {
6016 die "query-savevm failed with error '$state->{error}'\n"
6017 } else {
6018 die "query-savevm returned status '$state->{status}'\n";
6019 }
6020 }
6021 };
6022 my $err = $@;
6023
6024 PVE::QemuConfig->lock_config($vmid, sub {
6025 $conf = PVE::QemuConfig->load_config($vmid);
6026 if ($err) {
6027 # cleanup, but leave suspending lock, to indicate something went wrong
6028 eval {
6029 mon_cmd($vmid, "savevm-end");
6030 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6031 PVE::Storage::vdisk_free($storecfg, $vmstate);
6032 delete $conf->@{qw(vmstate runningmachine runningcpu)};
6033 PVE::QemuConfig->write_config($vmid, $conf);
6034 };
6035 warn $@ if $@;
6036 die $err;
6037 }
6038
6039 die "lock changed unexpectedly\n"
6040 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6041
6042 mon_cmd($vmid, "quit");
6043 $conf->{lock} = 'suspended';
6044 PVE::QemuConfig->write_config($vmid, $conf);
6045 });
6046 }
6047}
6048
6049sub vm_resume {
6050 my ($vmid, $skiplock, $nocheck) = @_;
6051
6052 PVE::QemuConfig->lock_config($vmid, sub {
6053 my $res = mon_cmd($vmid, 'query-status');
6054 my $resume_cmd = 'cont';
6055 my $reset = 0;
6056
6057 if ($res->{status}) {
6058 return if $res->{status} eq 'running'; # job done, go home
6059 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6060 $reset = 1 if $res->{status} eq 'shutdown';
6061 }
6062
6063 if (!$nocheck) {
6064
6065 my $conf = PVE::QemuConfig->load_config($vmid);
6066
6067 PVE::QemuConfig->check_lock($conf)
6068 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
6069 }
6070
6071 if ($reset) {
6072 # required if a VM shuts down during a backup and we get a resume
6073 # request before the backup finishes for example
6074 mon_cmd($vmid, "system_reset");
6075 }
6076 mon_cmd($vmid, $resume_cmd);
6077 });
6078}
6079
6080sub vm_sendkey {
6081 my ($vmid, $skiplock, $key) = @_;
6082
6083 PVE::QemuConfig->lock_config($vmid, sub {
6084
6085 my $conf = PVE::QemuConfig->load_config($vmid);
6086
6087 # there is no qmp command, so we use the human monitor command
6088 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
6089 die $res if $res ne '';
6090 });
6091}
6092
6093# vzdump restore implementaion
6094
6095sub tar_archive_read_firstfile {
6096 my $archive = shift;
6097
6098 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6099
6100 # try to detect archive type first
6101 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
6102 die "unable to open file '$archive'\n";
6103 my $firstfile = <$fh>;
6104 kill 15, $pid;
6105 close $fh;
6106
6107 die "ERROR: archive contaions no data\n" if !$firstfile;
6108 chomp $firstfile;
6109
6110 return $firstfile;
6111}
6112
6113sub tar_restore_cleanup {
6114 my ($storecfg, $statfile) = @_;
6115
6116 print STDERR "starting cleanup\n";
6117
6118 if (my $fd = IO::File->new($statfile, "r")) {
6119 while (defined(my $line = <$fd>)) {
6120 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6121 my $volid = $2;
6122 eval {
6123 if ($volid =~ m|^/|) {
6124 unlink $volid || die 'unlink failed\n';
6125 } else {
6126 PVE::Storage::vdisk_free($storecfg, $volid);
6127 }
6128 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6129 };
6130 print STDERR "unable to cleanup '$volid' - $@" if $@;
6131 } else {
6132 print STDERR "unable to parse line in statfile - $line";
6133 }
6134 }
6135 $fd->close();
6136 }
6137}
6138
6139sub restore_file_archive {
6140 my ($archive, $vmid, $user, $opts) = @_;
6141
6142 return restore_vma_archive($archive, $vmid, $user, $opts)
6143 if $archive eq '-';
6144
6145 my $info = PVE::Storage::archive_info($archive);
6146 my $format = $opts->{format} // $info->{format};
6147 my $comp = $info->{compression};
6148
6149 # try to detect archive format
6150 if ($format eq 'tar') {
6151 return restore_tar_archive($archive, $vmid, $user, $opts);
6152 } else {
6153 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6154 }
6155}
6156
6157# hepler to remove disks that will not be used after restore
6158my $restore_cleanup_oldconf = sub {
6159 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6160
6161 PVE::QemuConfig->foreach_volume($oldconf, sub {
6162 my ($ds, $drive) = @_;
6163
6164 return if drive_is_cdrom($drive, 1);
6165
6166 my $volid = $drive->{file};
6167 return if !$volid || $volid =~ m|^/|;
6168
6169 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6170 return if !$path || !$owner || ($owner != $vmid);
6171
6172 # Note: only delete disk we want to restore
6173 # other volumes will become unused
6174 if ($virtdev_hash->{$ds}) {
6175 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6176 if (my $err = $@) {
6177 warn $err;
6178 }
6179 }
6180 });
6181
6182 # delete vmstate files, after the restore we have no snapshots anymore
6183 foreach my $snapname (keys %{$oldconf->{snapshots}}) {
6184 my $snap = $oldconf->{snapshots}->{$snapname};
6185 if ($snap->{vmstate}) {
6186 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6187 if (my $err = $@) {
6188 warn $err;
6189 }
6190 }
6191 }
6192};
6193
6194# Helper to parse vzdump backup device hints
6195#
6196# $rpcenv: Environment, used to ckeck storage permissions
6197# $user: User ID, to check storage permissions
6198# $storecfg: Storage configuration
6199# $fh: the file handle for reading the configuration
6200# $devinfo: should contain device sizes for all backu-up'ed devices
6201# $options: backup options (pool, default storage)
6202#
6203# Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6204my $parse_backup_hints = sub {
6205 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6206
6207 my $virtdev_hash = {};
6208
6209 while (defined(my $line = <$fh>)) {
6210 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6211 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6212 die "archive does not contain data for drive '$virtdev'\n"
6213 if !$devinfo->{$devname};
6214
6215 if (defined($options->{storage})) {
6216 $storeid = $options->{storage} || 'local';
6217 } elsif (!$storeid) {
6218 $storeid = 'local';
6219 }
6220 $format = 'raw' if !$format;
6221 $devinfo->{$devname}->{devname} = $devname;
6222 $devinfo->{$devname}->{virtdev} = $virtdev;
6223 $devinfo->{$devname}->{format} = $format;
6224 $devinfo->{$devname}->{storeid} = $storeid;
6225
6226 # check permission on storage
6227 my $pool = $options->{pool}; # todo: do we need that?
6228 if ($user ne 'root@pam') {
6229 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace']);
6230 }
6231
6232 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6233 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6234 my $virtdev = $1;
6235 my $drive = parse_drive($virtdev, $2);
6236 if (drive_is_cloudinit($drive)) {
6237 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6238 $storeid = $options->{storage} if defined ($options->{storage});
6239 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6240 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6241
6242 $virtdev_hash->{$virtdev} = {
6243 format => $format,
6244 storeid => $storeid,
6245 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6246 is_cloudinit => 1,
6247 };
6248 }
6249 }
6250 }
6251
6252 return $virtdev_hash;
6253};
6254
6255# Helper to allocate and activate all volumes required for a restore
6256#
6257# $storecfg: Storage configuration
6258# $virtdev_hash: as returned by parse_backup_hints()
6259#
6260# Returns: { $virtdev => $volid }
6261my $restore_allocate_devices = sub {
6262 my ($storecfg, $virtdev_hash, $vmid) = @_;
6263
6264 my $map = {};
6265 foreach my $virtdev (sort keys %$virtdev_hash) {
6266 my $d = $virtdev_hash->{$virtdev};
6267 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6268 my $storeid = $d->{storeid};
6269 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6270
6271 # test if requested format is supported
6272 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6273 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6274 $d->{format} = $defFormat if !$supported;
6275
6276 my $name;
6277 if ($d->{is_cloudinit}) {
6278 $name = "vm-$vmid-cloudinit";
6279 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6280 if ($scfg->{path}) {
6281 $name .= ".$d->{format}";
6282 }
6283 }
6284
6285 my $volid = PVE::Storage::vdisk_alloc(
6286 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6287
6288 print STDERR "new volume ID is '$volid'\n";
6289 $d->{volid} = $volid;
6290
6291 PVE::Storage::activate_volumes($storecfg, [$volid]);
6292
6293 $map->{$virtdev} = $volid;
6294 }
6295
6296 return $map;
6297};
6298
6299sub restore_update_config_line {
6300 my ($cookie, $map, $line, $unique) = @_;
6301
6302 return '' if $line =~ m/^\#qmdump\#/;
6303 return '' if $line =~ m/^\#vzdump\#/;
6304 return '' if $line =~ m/^lock:/;
6305 return '' if $line =~ m/^unused\d+:/;
6306 return '' if $line =~ m/^parent:/;
6307
6308 my $res = '';
6309
6310 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6311 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6312 # try to convert old 1.X settings
6313 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6314 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6315 my ($model, $macaddr) = split(/\=/, $devconfig);
6316 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6317 my $net = {
6318 model => $model,
6319 bridge => "vmbr$ind",
6320 macaddr => $macaddr,
6321 };
6322 my $netstr = print_net($net);
6323
6324 $res .= "net$cookie->{netcount}: $netstr\n";
6325 $cookie->{netcount}++;
6326 }
6327 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6328 my ($id, $netstr) = ($1, $2);
6329 my $net = parse_net($netstr);
6330 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6331 $netstr = print_net($net);
6332 $res .= "$id: $netstr\n";
6333 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6334 my $virtdev = $1;
6335 my $value = $3;
6336 my $di = parse_drive($virtdev, $value);
6337 if (defined($di->{backup}) && !$di->{backup}) {
6338 $res .= "#$line";
6339 } elsif ($map->{$virtdev}) {
6340 delete $di->{format}; # format can change on restore
6341 $di->{file} = $map->{$virtdev};
6342 $value = print_drive($di);
6343 $res .= "$virtdev: $value\n";
6344 } else {
6345 $res .= $line;
6346 }
6347 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6348 my $vmgenid = $1;
6349 if ($vmgenid ne '0') {
6350 # always generate a new vmgenid if there was a valid one setup
6351 $vmgenid = generate_uuid();
6352 }
6353 $res .= "vmgenid: $vmgenid\n";
6354 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6355 my ($uuid, $uuid_str);
6356 UUID::generate($uuid);
6357 UUID::unparse($uuid, $uuid_str);
6358 my $smbios1 = parse_smbios1($2);
6359 $smbios1->{uuid} = $uuid_str;
6360 $res .= $1.print_smbios1($smbios1)."\n";
6361 } else {
6362 $res .= $line;
6363 }
6364
6365 return $res;
6366}
6367
6368my $restore_deactivate_volumes = sub {
6369 my ($storecfg, $devinfo) = @_;
6370
6371 my $vollist = [];
6372 foreach my $devname (keys %$devinfo) {
6373 my $volid = $devinfo->{$devname}->{volid};
6374 push @$vollist, $volid if $volid;
6375 }
6376
6377 PVE::Storage::deactivate_volumes($storecfg, $vollist);
6378};
6379
6380my $restore_destroy_volumes = sub {
6381 my ($storecfg, $devinfo) = @_;
6382
6383 foreach my $devname (keys %$devinfo) {
6384 my $volid = $devinfo->{$devname}->{volid};
6385 next if !$volid;
6386 eval {
6387 if ($volid =~ m|^/|) {
6388 unlink $volid || die 'unlink failed\n';
6389 } else {
6390 PVE::Storage::vdisk_free($storecfg, $volid);
6391 }
6392 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6393 };
6394 print STDERR "unable to cleanup '$volid' - $@" if $@;
6395 }
6396};
6397
6398sub scan_volids {
6399 my ($cfg, $vmid) = @_;
6400
6401 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6402
6403 my $volid_hash = {};
6404 foreach my $storeid (keys %$info) {
6405 foreach my $item (@{$info->{$storeid}}) {
6406 next if !($item->{volid} && $item->{size});
6407 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6408 $volid_hash->{$item->{volid}} = $item;
6409 }
6410 }
6411
6412 return $volid_hash;
6413}
6414
6415sub update_disk_config {
6416 my ($vmid, $conf, $volid_hash) = @_;
6417
6418 my $changes;
6419 my $prefix = "VM $vmid";
6420
6421 # used and unused disks
6422 my $referenced = {};
6423
6424 # Note: it is allowed to define multiple storages with same path (alias), so
6425 # we need to check both 'volid' and real 'path' (two different volid can point
6426 # to the same path).
6427
6428 my $referencedpath = {};
6429
6430 # update size info
6431 PVE::QemuConfig->foreach_volume($conf, sub {
6432 my ($opt, $drive) = @_;
6433
6434 my $volid = $drive->{file};
6435 return if !$volid;
6436 my $volume = $volid_hash->{$volid};
6437
6438 # mark volid as "in-use" for next step
6439 $referenced->{$volid} = 1;
6440 if ($volume && (my $path = $volume->{path})) {
6441 $referencedpath->{$path} = 1;
6442 }
6443
6444 return if drive_is_cdrom($drive);
6445 return if !$volume;
6446
6447 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6448 if (defined($updated)) {
6449 $changes = 1;
6450 $conf->{$opt} = print_drive($updated);
6451 print "$prefix ($opt): $msg\n";
6452 }
6453 });
6454
6455 # remove 'unusedX' entry if volume is used
6456 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6457 my ($opt, $drive) = @_;
6458
6459 my $volid = $drive->{file};
6460 return if !$volid;
6461
6462 my $path;
6463 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6464 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6465 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6466 $changes = 1;
6467 delete $conf->{$opt};
6468 }
6469
6470 $referenced->{$volid} = 1;
6471 $referencedpath->{$path} = 1 if $path;
6472 });
6473
6474 foreach my $volid (sort keys %$volid_hash) {
6475 next if $volid =~ m/vm-$vmid-state-/;
6476 next if $referenced->{$volid};
6477 my $path = $volid_hash->{$volid}->{path};
6478 next if !$path; # just to be sure
6479 next if $referencedpath->{$path};
6480 $changes = 1;
6481 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6482 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6483 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6484 }
6485
6486 return $changes;
6487}
6488
6489sub rescan {
6490 my ($vmid, $nolock, $dryrun) = @_;
6491
6492 my $cfg = PVE::Storage::config();
6493
6494 print "rescan volumes...\n";
6495 my $volid_hash = scan_volids($cfg, $vmid);
6496
6497 my $updatefn = sub {
6498 my ($vmid) = @_;
6499
6500 my $conf = PVE::QemuConfig->load_config($vmid);
6501
6502 PVE::QemuConfig->check_lock($conf);
6503
6504 my $vm_volids = {};
6505 foreach my $volid (keys %$volid_hash) {
6506 my $info = $volid_hash->{$volid};
6507 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6508 }
6509
6510 my $changes = update_disk_config($vmid, $conf, $vm_volids);
6511
6512 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
6513 };
6514
6515 if (defined($vmid)) {
6516 if ($nolock) {
6517 &$updatefn($vmid);
6518 } else {
6519 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6520 }
6521 } else {
6522 my $vmlist = config_list();
6523 foreach my $vmid (keys %$vmlist) {
6524 if ($nolock) {
6525 &$updatefn($vmid);
6526 } else {
6527 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6528 }
6529 }
6530 }
6531}
6532
6533sub restore_proxmox_backup_archive {
6534 my ($archive, $vmid, $user, $options) = @_;
6535
6536 my $storecfg = PVE::Storage::config();
6537
6538 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
6539 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6540
6541 my $fingerprint = $scfg->{fingerprint};
6542 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
6543
6544 my $repo = PVE::PBSClient::get_repository($scfg);
6545
6546 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
6547 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
6548 local $ENV{PBS_PASSWORD} = $password;
6549 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
6550
6551 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
6552 PVE::Storage::parse_volname($storecfg, $archive);
6553
6554 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
6555
6556 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
6557
6558 my $tmpdir = "/var/tmp/vzdumptmp$$";
6559 rmtree $tmpdir;
6560 mkpath $tmpdir;
6561
6562 my $conffile = PVE::QemuConfig->config_file($vmid);
6563 # disable interrupts (always do cleanups)
6564 local $SIG{INT} =
6565 local $SIG{TERM} =
6566 local $SIG{QUIT} =
6567 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
6568
6569 # Note: $oldconf is undef if VM does not exists
6570 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6571 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6572 my $new_conf_raw = '';
6573
6574 my $rpcenv = PVE::RPCEnvironment::get();
6575 my $devinfo = {};
6576
6577 eval {
6578 # enable interrupts
6579 local $SIG{INT} =
6580 local $SIG{TERM} =
6581 local $SIG{QUIT} =
6582 local $SIG{HUP} =
6583 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6584
6585 my $cfgfn = "$tmpdir/qemu-server.conf";
6586 my $firewall_config_fn = "$tmpdir/fw.conf";
6587 my $index_fn = "$tmpdir/index.json";
6588
6589 my $cmd = "restore";
6590
6591 my $param = [$pbs_backup_name, "index.json", $index_fn];
6592 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6593 my $index = PVE::Tools::file_get_contents($index_fn);
6594 $index = decode_json($index);
6595
6596 # print Dumper($index);
6597 foreach my $info (@{$index->{files}}) {
6598 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
6599 my $devname = $1;
6600 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
6601 $devinfo->{$devname}->{size} = $1;
6602 } else {
6603 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
6604 }
6605 }
6606 }
6607
6608 my $is_qemu_server_backup = scalar(
6609 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
6610 );
6611 if (!$is_qemu_server_backup) {
6612 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
6613 }
6614 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
6615
6616 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
6617 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6618
6619 if ($has_firewall_config) {
6620 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
6621 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6622
6623 my $pve_firewall_dir = '/etc/pve/firewall';
6624 mkdir $pve_firewall_dir; # make sure the dir exists
6625 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
6626 }
6627
6628 my $fh = IO::File->new($cfgfn, "r") ||
6629 die "unable to read qemu-server.conf - $!\n";
6630
6631 my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
6632
6633 # fixme: rate limit?
6634
6635 # create empty/temp config
6636 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
6637
6638 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
6639
6640 # allocate volumes
6641 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
6642
6643 foreach my $virtdev (sort keys %$virtdev_hash) {
6644 my $d = $virtdev_hash->{$virtdev};
6645 next if $d->{is_cloudinit}; # no need to restore cloudinit
6646
6647 # this fails if storage is unavailable
6648 my $volid = $d->{volid};
6649 my $path = PVE::Storage::path($storecfg, $volid);
6650
6651 # for live-restore we only want to preload the efidisk and TPM state
6652 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
6653
6654 my $pbs_restore_cmd = [
6655 '/usr/bin/pbs-restore',
6656 '--repository', $repo,
6657 $pbs_backup_name,
6658 "$d->{devname}.img.fidx",
6659 $path,
6660 '--verbose',
6661 ];
6662
6663 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
6664 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
6665
6666 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
6667 push @$pbs_restore_cmd, '--skip-zero';
6668 }
6669
6670 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
6671 print "restore proxmox backup image: $dbg_cmdstring\n";
6672 run_command($pbs_restore_cmd);
6673 }
6674
6675 $fh->seek(0, 0) || die "seek failed - $!\n";
6676
6677 my $cookie = { netcount => 0 };
6678 while (defined(my $line = <$fh>)) {
6679 $new_conf_raw .= restore_update_config_line(
6680 $cookie,
6681 $map,
6682 $line,
6683 $options->{unique},
6684 );
6685 }
6686
6687 $fh->close();
6688 };
6689 my $err = $@;
6690
6691 if ($err || !$options->{live}) {
6692 $restore_deactivate_volumes->($storecfg, $devinfo);
6693 }
6694
6695 rmtree $tmpdir;
6696
6697 if ($err) {
6698 $restore_destroy_volumes->($storecfg, $devinfo);
6699 die $err;
6700 }
6701
6702 if ($options->{live}) {
6703 # keep lock during live-restore
6704 $new_conf_raw .= "\nlock: create";
6705 }
6706
6707 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
6708
6709 PVE::Cluster::cfs_update(); # make sure we read new file
6710
6711 eval { rescan($vmid, 1); };
6712 warn $@ if $@;
6713
6714 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
6715
6716 if ($options->{live}) {
6717 # enable interrupts
6718 local $SIG{INT} =
6719 local $SIG{TERM} =
6720 local $SIG{QUIT} =
6721 local $SIG{HUP} =
6722 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
6723
6724 my $conf = PVE::QemuConfig->load_config($vmid);
6725 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
6726
6727 # these special drives are already restored before start
6728 delete $devinfo->{'drive-efidisk0'};
6729 delete $devinfo->{'drive-tpmstate0-backup'};
6730 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $repo, $keyfile, $pbs_backup_name);
6731
6732 PVE::QemuConfig->remove_lock($vmid, "create");
6733 }
6734}
6735
6736sub pbs_live_restore {
6737 my ($vmid, $conf, $storecfg, $restored_disks, $repo, $keyfile, $snap) = @_;
6738
6739 print "starting VM for live-restore\n";
6740 print "repository: '$repo', snapshot: '$snap'\n";
6741
6742 my $pbs_backing = {};
6743 for my $ds (keys %$restored_disks) {
6744 $ds =~ m/^drive-(.*)$/;
6745 my $confname = $1;
6746 $pbs_backing->{$confname} = {
6747 repository => $repo,
6748 snapshot => $snap,
6749 archive => "$ds.img.fidx",
6750 };
6751 $pbs_backing->{$confname}->{keyfile} = $keyfile if -e $keyfile;
6752
6753 my $drive = parse_drive($confname, $conf->{$confname});
6754 print "restoring '$ds' to '$drive->{file}'\n";
6755 }
6756
6757 my $drives_streamed = 0;
6758 eval {
6759 # make sure HA doesn't interrupt our restore by stopping the VM
6760 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
6761 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
6762 }
6763
6764 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
6765 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
6766 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
6767
6768 my $qmeventd_fd = register_qmeventd_handle($vmid);
6769
6770 # begin streaming, i.e. data copy from PBS to target disk for every vol,
6771 # this will effectively collapse the backing image chain consisting of
6772 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
6773 # removes itself once all backing images vanish with 'auto-remove=on')
6774 my $jobs = {};
6775 for my $ds (sort keys %$restored_disks) {
6776 my $job_id = "restore-$ds";
6777 mon_cmd($vmid, 'block-stream',
6778 'job-id' => $job_id,
6779 device => "$ds",
6780 );
6781 $jobs->{$job_id} = {};
6782 }
6783
6784 mon_cmd($vmid, 'cont');
6785 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
6786
6787 print "restore-drive jobs finished successfully, removing all tracking block devices"
6788 ." to disconnect from Proxmox Backup Server\n";
6789
6790 for my $ds (sort keys %$restored_disks) {
6791 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
6792 }
6793
6794 close($qmeventd_fd);
6795 };
6796
6797 my $err = $@;
6798
6799 if ($err) {
6800 warn "An error occured during live-restore: $err\n";
6801 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
6802 die "live-restore failed\n";
6803 }
6804}
6805
6806sub restore_vma_archive {
6807 my ($archive, $vmid, $user, $opts, $comp) = @_;
6808
6809 my $readfrom = $archive;
6810
6811 my $cfg = PVE::Storage::config();
6812 my $commands = [];
6813 my $bwlimit = $opts->{bwlimit};
6814
6815 my $dbg_cmdstring = '';
6816 my $add_pipe = sub {
6817 my ($cmd) = @_;
6818 push @$commands, $cmd;
6819 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
6820 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
6821 $readfrom = '-';
6822 };
6823
6824 my $input = undef;
6825 if ($archive eq '-') {
6826 $input = '<&STDIN';
6827 } else {
6828 # If we use a backup from a PVE defined storage we also consider that
6829 # storage's rate limit:
6830 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
6831 if (defined($volid)) {
6832 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
6833 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
6834 if ($readlimit) {
6835 print STDERR "applying read rate limit: $readlimit\n";
6836 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
6837 $add_pipe->($cstream);
6838 }
6839 }
6840 }
6841
6842 if ($comp) {
6843 my $info = PVE::Storage::decompressor_info('vma', $comp);
6844 my $cmd = $info->{decompressor};
6845 push @$cmd, $readfrom;
6846 $add_pipe->($cmd);
6847 }
6848
6849 my $tmpdir = "/var/tmp/vzdumptmp$$";
6850 rmtree $tmpdir;
6851
6852 # disable interrupts (always do cleanups)
6853 local $SIG{INT} =
6854 local $SIG{TERM} =
6855 local $SIG{QUIT} =
6856 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
6857
6858 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
6859 POSIX::mkfifo($mapfifo, 0600);
6860 my $fifofh;
6861 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
6862
6863 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
6864
6865 my $oldtimeout;
6866 my $timeout = 5;
6867
6868 my $devinfo = {};
6869
6870 my $rpcenv = PVE::RPCEnvironment::get();
6871
6872 my $conffile = PVE::QemuConfig->config_file($vmid);
6873
6874 # Note: $oldconf is undef if VM does not exist
6875 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6876 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6877 my $new_conf_raw = '';
6878
6879 my %storage_limits;
6880
6881 my $print_devmap = sub {
6882 my $cfgfn = "$tmpdir/qemu-server.conf";
6883
6884 # we can read the config - that is already extracted
6885 my $fh = IO::File->new($cfgfn, "r") ||
6886 die "unable to read qemu-server.conf - $!\n";
6887
6888 my $fwcfgfn = "$tmpdir/qemu-server.fw";
6889 if (-f $fwcfgfn) {
6890 my $pve_firewall_dir = '/etc/pve/firewall';
6891 mkdir $pve_firewall_dir; # make sure the dir exists
6892 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
6893 }
6894
6895 my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
6896
6897 foreach my $info (values %{$virtdev_hash}) {
6898 my $storeid = $info->{storeid};
6899 next if defined($storage_limits{$storeid});
6900
6901 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
6902 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
6903 $storage_limits{$storeid} = $limit * 1024;
6904 }
6905
6906 foreach my $devname (keys %$devinfo) {
6907 die "found no device mapping information for device '$devname'\n"
6908 if !$devinfo->{$devname}->{virtdev};
6909 }
6910
6911 # create empty/temp config
6912 if ($oldconf) {
6913 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
6914 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
6915 }
6916
6917 # allocate volumes
6918 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
6919
6920 # print restore information to $fifofh
6921 foreach my $virtdev (sort keys %$virtdev_hash) {
6922 my $d = $virtdev_hash->{$virtdev};
6923 next if $d->{is_cloudinit}; # no need to restore cloudinit
6924
6925 my $storeid = $d->{storeid};
6926 my $volid = $d->{volid};
6927
6928 my $map_opts = '';
6929 if (my $limit = $storage_limits{$storeid}) {
6930 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
6931 }
6932
6933 my $write_zeros = 1;
6934 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
6935 $write_zeros = 0;
6936 }
6937
6938 my $path = PVE::Storage::path($cfg, $volid);
6939
6940 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
6941
6942 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
6943 }
6944
6945 $fh->seek(0, 0) || die "seek failed - $!\n";
6946
6947 my $cookie = { netcount => 0 };
6948 while (defined(my $line = <$fh>)) {
6949 $new_conf_raw .= restore_update_config_line(
6950 $cookie,
6951 $map,
6952 $line,
6953 $opts->{unique},
6954 );
6955 }
6956
6957 $fh->close();
6958 };
6959
6960 eval {
6961 # enable interrupts
6962 local $SIG{INT} =
6963 local $SIG{TERM} =
6964 local $SIG{QUIT} =
6965 local $SIG{HUP} =
6966 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6967 local $SIG{ALRM} = sub { die "got timeout\n"; };
6968
6969 $oldtimeout = alarm($timeout);
6970
6971 my $parser = sub {
6972 my $line = shift;
6973
6974 print "$line\n";
6975
6976 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
6977 my ($dev_id, $size, $devname) = ($1, $2, $3);
6978 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
6979 } elsif ($line =~ m/^CTIME: /) {
6980 # we correctly received the vma config, so we can disable
6981 # the timeout now for disk allocation (set to 10 minutes, so
6982 # that we always timeout if something goes wrong)
6983 alarm(600);
6984 &$print_devmap();
6985 print $fifofh "done\n";
6986 my $tmp = $oldtimeout || 0;
6987 $oldtimeout = undef;
6988 alarm($tmp);
6989 close($fifofh);
6990 $fifofh = undef;
6991 }
6992 };
6993
6994 print "restore vma archive: $dbg_cmdstring\n";
6995 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
6996 };
6997 my $err = $@;
6998
6999 alarm($oldtimeout) if $oldtimeout;
7000
7001 $restore_deactivate_volumes->($cfg, $devinfo);
7002
7003 close($fifofh) if $fifofh;
7004 unlink $mapfifo;
7005 rmtree $tmpdir;
7006
7007 if ($err) {
7008 $restore_destroy_volumes->($cfg, $devinfo);
7009 die $err;
7010 }
7011
7012 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7013
7014 PVE::Cluster::cfs_update(); # make sure we read new file
7015
7016 eval { rescan($vmid, 1); };
7017 warn $@ if $@;
7018
7019 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
7020}
7021
7022sub restore_tar_archive {
7023 my ($archive, $vmid, $user, $opts) = @_;
7024
7025 if ($archive ne '-') {
7026 my $firstfile = tar_archive_read_firstfile($archive);
7027 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
7028 if $firstfile ne 'qemu-server.conf';
7029 }
7030
7031 my $storecfg = PVE::Storage::config();
7032
7033 # avoid zombie disks when restoring over an existing VM -> cleanup first
7034 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
7035 # skiplock=1 because qmrestore has set the 'create' lock itself already
7036 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
7037 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
7038
7039 my $tocmd = "/usr/lib/qemu-server/qmextract";
7040
7041 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
7042 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
7043 $tocmd .= ' --prealloc' if $opts->{prealloc};
7044 $tocmd .= ' --info' if $opts->{info};
7045
7046 # tar option "xf" does not autodetect compression when read from STDIN,
7047 # so we pipe to zcat
7048 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7049 PVE::Tools::shellquote("--to-command=$tocmd");
7050
7051 my $tmpdir = "/var/tmp/vzdumptmp$$";
7052 mkpath $tmpdir;
7053
7054 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7055 local $ENV{VZDUMP_VMID} = $vmid;
7056 local $ENV{VZDUMP_USER} = $user;
7057
7058 my $conffile = PVE::QemuConfig->config_file($vmid);
7059 my $new_conf_raw = '';
7060
7061 # disable interrupts (always do cleanups)
7062 local $SIG{INT} =
7063 local $SIG{TERM} =
7064 local $SIG{QUIT} =
7065 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7066
7067 eval {
7068 # enable interrupts
7069 local $SIG{INT} =
7070 local $SIG{TERM} =
7071 local $SIG{QUIT} =
7072 local $SIG{HUP} =
7073 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7074
7075 if ($archive eq '-') {
7076 print "extracting archive from STDIN\n";
7077 run_command($cmd, input => "<&STDIN");
7078 } else {
7079 print "extracting archive '$archive'\n";
7080 run_command($cmd);
7081 }
7082
7083 return if $opts->{info};
7084
7085 # read new mapping
7086 my $map = {};
7087 my $statfile = "$tmpdir/qmrestore.stat";
7088 if (my $fd = IO::File->new($statfile, "r")) {
7089 while (defined (my $line = <$fd>)) {
7090 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7091 $map->{$1} = $2 if $1;
7092 } else {
7093 print STDERR "unable to parse line in statfile - $line\n";
7094 }
7095 }
7096 $fd->close();
7097 }
7098
7099 my $confsrc = "$tmpdir/qemu-server.conf";
7100
7101 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
7102
7103 my $cookie = { netcount => 0 };
7104 while (defined (my $line = <$srcfd>)) {
7105 $new_conf_raw .= restore_update_config_line(
7106 $cookie,
7107 $map,
7108 $line,
7109 $opts->{unique},
7110 );
7111 }
7112
7113 $srcfd->close();
7114 };
7115 if (my $err = $@) {
7116 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7117 die $err;
7118 }
7119
7120 rmtree $tmpdir;
7121
7122 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7123
7124 PVE::Cluster::cfs_update(); # make sure we read new file
7125
7126 eval { rescan($vmid, 1); };
7127 warn $@ if $@;
7128};
7129
7130sub foreach_storage_used_by_vm {
7131 my ($conf, $func) = @_;
7132
7133 my $sidhash = {};
7134
7135 PVE::QemuConfig->foreach_volume($conf, sub {
7136 my ($ds, $drive) = @_;
7137 return if drive_is_cdrom($drive);
7138
7139 my $volid = $drive->{file};
7140
7141 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7142 $sidhash->{$sid} = $sid if $sid;
7143 });
7144
7145 foreach my $sid (sort keys %$sidhash) {
7146 &$func($sid);
7147 }
7148}
7149
7150my $qemu_snap_storage = {
7151 rbd => 1,
7152};
7153sub do_snapshots_with_qemu {
7154 my ($storecfg, $volid, $deviceid) = @_;
7155
7156 return if $deviceid =~ m/tpmstate0/;
7157
7158 my $storage_name = PVE::Storage::parse_volume_id($volid);
7159 my $scfg = $storecfg->{ids}->{$storage_name};
7160 die "could not find storage '$storage_name'\n" if !defined($scfg);
7161
7162 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7163 return 1;
7164 }
7165
7166 if ($volid =~ m/\.(qcow2|qed)$/){
7167 return 1;
7168 }
7169
7170 return;
7171}
7172
7173sub qga_check_running {
7174 my ($vmid, $nowarn) = @_;
7175
7176 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7177 if ($@) {
7178 warn "Qemu Guest Agent is not running - $@" if !$nowarn;
7179 return 0;
7180 }
7181 return 1;
7182}
7183
7184sub template_create {
7185 my ($vmid, $conf, $disk) = @_;
7186
7187 my $storecfg = PVE::Storage::config();
7188
7189 PVE::QemuConfig->foreach_volume($conf, sub {
7190 my ($ds, $drive) = @_;
7191
7192 return if drive_is_cdrom($drive);
7193 return if $disk && $ds ne $disk;
7194
7195 my $volid = $drive->{file};
7196 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7197
7198 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7199 $drive->{file} = $voliddst;
7200 $conf->{$ds} = print_drive($drive);
7201 PVE::QemuConfig->write_config($vmid, $conf);
7202 });
7203}
7204
7205sub convert_iscsi_path {
7206 my ($path) = @_;
7207
7208 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7209 my $portal = $1;
7210 my $target = $2;
7211 my $lun = $3;
7212
7213 my $initiator_name = get_initiator_name();
7214
7215 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7216 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7217 }
7218
7219 die "cannot convert iscsi path '$path', unkown format\n";
7220}
7221
7222sub qemu_img_convert {
7223 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized) = @_;
7224
7225 my $storecfg = PVE::Storage::config();
7226 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7227 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7228
7229 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7230
7231 my $cachemode;
7232 my $src_path;
7233 my $src_is_iscsi = 0;
7234 my $src_format;
7235
7236 if ($src_storeid) {
7237 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7238 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7239 $src_format = qemu_img_format($src_scfg, $src_volname);
7240 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7241 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7242 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7243 } elsif (-f $src_volid) {
7244 $src_path = $src_volid;
7245 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7246 $src_format = $1;
7247 }
7248 }
7249
7250 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7251
7252 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7253 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7254 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7255 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7256
7257 my $cmd = [];
7258 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7259 push @$cmd, '-l', "snapshot.name=$snapname"
7260 if $snapname && $src_format && $src_format eq "qcow2";
7261 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7262 push @$cmd, '-T', $cachemode if defined($cachemode);
7263
7264 if ($src_is_iscsi) {
7265 push @$cmd, '--image-opts';
7266 $src_path = convert_iscsi_path($src_path);
7267 } elsif ($src_format) {
7268 push @$cmd, '-f', $src_format;
7269 }
7270
7271 if ($dst_is_iscsi) {
7272 push @$cmd, '--target-image-opts';
7273 $dst_path = convert_iscsi_path($dst_path);
7274 } else {
7275 push @$cmd, '-O', $dst_format;
7276 }
7277
7278 push @$cmd, $src_path;
7279
7280 if (!$dst_is_iscsi && $is_zero_initialized) {
7281 push @$cmd, "zeroinit:$dst_path";
7282 } else {
7283 push @$cmd, $dst_path;
7284 }
7285
7286 my $parser = sub {
7287 my $line = shift;
7288 if($line =~ m/\((\S+)\/100\%\)/){
7289 my $percent = $1;
7290 my $transferred = int($size * $percent / 100);
7291 my $total_h = render_bytes($size, 1);
7292 my $transferred_h = render_bytes($transferred, 1);
7293
7294 print "transferred $transferred_h of $total_h ($percent%)\n";
7295 }
7296
7297 };
7298
7299 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7300 my $err = $@;
7301 die "copy failed: $err" if $err;
7302}
7303
7304sub qemu_img_format {
7305 my ($scfg, $volname) = @_;
7306
7307 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7308 return $1;
7309 } else {
7310 return "raw";
7311 }
7312}
7313
7314sub qemu_drive_mirror {
7315 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7316
7317 $jobs = {} if !$jobs;
7318
7319 my $qemu_target;
7320 my $format;
7321 $jobs->{"drive-$drive"} = {};
7322
7323 if ($dst_volid =~ /^nbd:/) {
7324 $qemu_target = $dst_volid;
7325 $format = "nbd";
7326 } else {
7327 my $storecfg = PVE::Storage::config();
7328 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7329
7330 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7331
7332 $format = qemu_img_format($dst_scfg, $dst_volname);
7333
7334 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7335
7336 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7337 }
7338
7339 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7340 $opts->{format} = $format if $format;
7341
7342 if (defined($src_bitmap)) {
7343 $opts->{sync} = 'incremental';
7344 $opts->{bitmap} = $src_bitmap;
7345 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7346 }
7347
7348 if (defined($bwlimit)) {
7349 $opts->{speed} = $bwlimit * 1024;
7350 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7351 } else {
7352 print "drive mirror is starting for drive-$drive\n";
7353 }
7354
7355 # if a job already runs for this device we get an error, catch it for cleanup
7356 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7357 if (my $err = $@) {
7358 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7359 warn "$@\n" if $@;
7360 die "mirroring error: $err\n";
7361 }
7362
7363 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7364}
7365
7366# $completion can be either
7367# 'complete': wait until all jobs are ready, block-job-complete them (default)
7368# 'cancel': wait until all jobs are ready, block-job-cancel them
7369# 'skip': wait until all jobs are ready, return with block jobs in ready state
7370# 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7371sub qemu_drive_mirror_monitor {
7372 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7373
7374 $completion //= 'complete';
7375 $op //= "mirror";
7376
7377 eval {
7378 my $err_complete = 0;
7379
7380 my $starttime = time ();
7381 while (1) {
7382 die "block job ('$op') timed out\n" if $err_complete > 300;
7383
7384 my $stats = mon_cmd($vmid, "query-block-jobs");
7385 my $ctime = time();
7386
7387 my $running_jobs = {};
7388 for my $stat (@$stats) {
7389 next if $stat->{type} ne $op;
7390 $running_jobs->{$stat->{device}} = $stat;
7391 }
7392
7393 my $readycounter = 0;
7394
7395 for my $job_id (sort keys %$jobs) {
7396 my $job = $running_jobs->{$job_id};
7397
7398 my $vanished = !defined($job);
7399 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7400 if($complete || ($vanished && $completion eq 'auto')) {
7401 print "$job_id: $op-job finished\n";
7402 delete $jobs->{$job_id};
7403 next;
7404 }
7405
7406 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7407
7408 my $busy = $job->{busy};
7409 my $ready = $job->{ready};
7410 if (my $total = $job->{len}) {
7411 my $transferred = $job->{offset} || 0;
7412 my $remaining = $total - $transferred;
7413 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7414
7415 my $duration = $ctime - $starttime;
7416 my $total_h = render_bytes($total, 1);
7417 my $transferred_h = render_bytes($transferred, 1);
7418
7419 my $status = sprintf(
7420 "transferred $transferred_h of $total_h ($percent%%) in %s",
7421 render_duration($duration),
7422 );
7423
7424 if ($ready) {
7425 if ($busy) {
7426 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7427 } else {
7428 $status .= ", ready";
7429 }
7430 }
7431 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7432 $jobs->{$job_id}->{ready} = $ready;
7433 }
7434
7435 $readycounter++ if $job->{ready};
7436 }
7437
7438 last if scalar(keys %$jobs) == 0;
7439
7440 if ($readycounter == scalar(keys %$jobs)) {
7441 print "all '$op' jobs are ready\n";
7442
7443 # do the complete later (or has already been done)
7444 last if $completion eq 'skip' || $completion eq 'auto';
7445
7446 if ($vmiddst && $vmiddst != $vmid) {
7447 my $agent_running = $qga && qga_check_running($vmid);
7448 if ($agent_running) {
7449 print "freeze filesystem\n";
7450 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
7451 warn $@ if $@;
7452 } else {
7453 print "suspend vm\n";
7454 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
7455 warn $@ if $@;
7456 }
7457
7458 # if we clone a disk for a new target vm, we don't switch the disk
7459 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
7460
7461 if ($agent_running) {
7462 print "unfreeze filesystem\n";
7463 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
7464 warn $@ if $@;
7465 } else {
7466 print "resume vm\n";
7467 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7468 warn $@ if $@;
7469 }
7470
7471 last;
7472 } else {
7473
7474 for my $job_id (sort keys %$jobs) {
7475 # try to switch the disk if source and destination are on the same guest
7476 print "$job_id: Completing block job_id...\n";
7477
7478 my $op;
7479 if ($completion eq 'complete') {
7480 $op = 'block-job-complete';
7481 } elsif ($completion eq 'cancel') {
7482 $op = 'block-job-cancel';
7483 } else {
7484 die "invalid completion value: $completion\n";
7485 }
7486 eval { mon_cmd($vmid, $op, device => $job_id) };
7487 if ($@ =~ m/cannot be completed/) {
7488 print "$job_id: block job cannot be completed, trying again.\n";
7489 $err_complete++;
7490 }else {
7491 print "$job_id: Completed successfully.\n";
7492 $jobs->{$job_id}->{complete} = 1;
7493 }
7494 }
7495 }
7496 }
7497 sleep 1;
7498 }
7499 };
7500 my $err = $@;
7501
7502 if ($err) {
7503 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7504 die "block job ($op) error: $err";
7505 }
7506}
7507
7508sub qemu_blockjobs_cancel {
7509 my ($vmid, $jobs) = @_;
7510
7511 foreach my $job (keys %$jobs) {
7512 print "$job: Cancelling block job\n";
7513 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
7514 $jobs->{$job}->{cancel} = 1;
7515 }
7516
7517 while (1) {
7518 my $stats = mon_cmd($vmid, "query-block-jobs");
7519
7520 my $running_jobs = {};
7521 foreach my $stat (@$stats) {
7522 $running_jobs->{$stat->{device}} = $stat;
7523 }
7524
7525 foreach my $job (keys %$jobs) {
7526
7527 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
7528 print "$job: Done.\n";
7529 delete $jobs->{$job};
7530 }
7531 }
7532
7533 last if scalar(keys %$jobs) == 0;
7534
7535 sleep 1;
7536 }
7537}
7538
7539sub clone_disk {
7540 my ($storecfg, $vmid, $running, $drivename, $drive, $snapname,
7541 $newvmid, $storage, $format, $full, $newvollist, $jobs, $completion, $qga, $bwlimit, $conf) = @_;
7542
7543 my $newvolid;
7544
7545 if (!$full) {
7546 print "create linked clone of drive $drivename ($drive->{file})\n";
7547 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
7548 push @$newvollist, $newvolid;
7549 } else {
7550
7551 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
7552 $storeid = $storage if $storage;
7553
7554 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
7555
7556 print "create full clone of drive $drivename ($drive->{file})\n";
7557 my $name = undef;
7558 my $size = undef;
7559 if (drive_is_cloudinit($drive)) {
7560 $name = "vm-$newvmid-cloudinit";
7561 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7562 if ($scfg->{path}) {
7563 $name .= ".$dst_format";
7564 }
7565 $snapname = undef;
7566 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
7567 } elsif ($drivename eq 'efidisk0') {
7568 $size = get_efivars_size($conf);
7569 } elsif ($drivename eq 'tpmstate0') {
7570 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7571 } else {
7572 ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
7573 }
7574 $newvolid = PVE::Storage::vdisk_alloc(
7575 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
7576 );
7577 push @$newvollist, $newvolid;
7578
7579 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
7580
7581 if (drive_is_cloudinit($drive)) {
7582 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
7583 # if this is the case, we have to complete any block-jobs still there from
7584 # previous drive-mirrors
7585 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
7586 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
7587 }
7588 goto no_data_clone;
7589 }
7590
7591 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
7592 if (!$running || $snapname) {
7593 # TODO: handle bwlimits
7594 if ($drivename eq 'efidisk0') {
7595 # the relevant data on the efidisk may be smaller than the source
7596 # e.g. on RBD/ZFS, so we use dd to copy only the amount
7597 # that is given by the OVMF_VARS.fd
7598 my $src_path = PVE::Storage::path($storecfg, $drive->{file});
7599 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
7600
7601 # better for Ceph if block size is not too small, see bug #3324
7602 my $bs = 1024*1024;
7603
7604 run_command(['qemu-img', 'dd', '-n', '-O', $dst_format, "bs=$bs", "osize=$size",
7605 "if=$src_path", "of=$dst_path"]);
7606 } else {
7607 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit);
7608 }
7609 } else {
7610
7611 die "cannot move TPM state while VM is running\n" if $drivename eq 'tpmstate0';
7612
7613 my $kvmver = get_running_qemu_version ($vmid);
7614 if (!min_version($kvmver, 2, 7)) {
7615 die "drive-mirror with iothread requires qemu version 2.7 or higher\n"
7616 if $drive->{iothread};
7617 }
7618
7619 qemu_drive_mirror($vmid, $drivename, $newvolid, $newvmid, $sparseinit, $jobs,
7620 $completion, $qga, $bwlimit);
7621 }
7622 }
7623
7624no_data_clone:
7625 my ($size) = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
7626
7627 my $disk = dclone($drive);
7628 delete $disk->{format};
7629 $disk->{file} = $newvolid;
7630 $disk->{size} = $size if defined($size);
7631
7632 return $disk;
7633}
7634
7635sub get_running_qemu_version {
7636 my ($vmid) = @_;
7637 my $res = mon_cmd($vmid, "query-version");
7638 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
7639}
7640
7641sub qemu_use_old_bios_files {
7642 my ($machine_type) = @_;
7643
7644 return if !$machine_type;
7645
7646 my $use_old_bios_files = undef;
7647
7648 if ($machine_type =~ m/^(\S+)\.pxe$/) {
7649 $machine_type = $1;
7650 $use_old_bios_files = 1;
7651 } else {
7652 my $version = extract_version($machine_type, kvm_user_version());
7653 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
7654 # load new efi bios files on migration. So this hack is required to allow
7655 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
7656 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
7657 $use_old_bios_files = !min_version($version, 2, 4);
7658 }
7659
7660 return ($use_old_bios_files, $machine_type);
7661}
7662
7663sub get_efivars_size {
7664 my ($conf) = @_;
7665 my $arch = get_vm_arch($conf);
7666 my $efidisk = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
7667 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
7668 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7669 die "uefi vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
7670 return -s $ovmf_vars;
7671}
7672
7673sub update_efidisk_size {
7674 my ($conf) = @_;
7675
7676 return if !defined($conf->{efidisk0});
7677
7678 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
7679 $disk->{size} = get_efivars_size($conf);
7680 $conf->{efidisk0} = print_drive($disk);
7681
7682 return;
7683}
7684
7685sub update_tpmstate_size {
7686 my ($conf) = @_;
7687
7688 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
7689 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7690 $conf->{tpmstate0} = print_drive($disk);
7691}
7692
7693sub create_efidisk($$$$$$$) {
7694 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
7695
7696 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7697 die "EFI vars default image not found\n" if ! -f $ovmf_vars;
7698
7699 my $vars_size_b = -s $ovmf_vars;
7700 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
7701 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
7702 PVE::Storage::activate_volumes($storecfg, [$volid]);
7703
7704 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
7705 my ($size) = PVE::Storage::volume_size_info($storecfg, $volid, 3);
7706
7707 return ($volid, $size/1024);
7708}
7709
7710sub vm_iothreads_list {
7711 my ($vmid) = @_;
7712
7713 my $res = mon_cmd($vmid, 'query-iothreads');
7714
7715 my $iothreads = {};
7716 foreach my $iothread (@$res) {
7717 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
7718 }
7719
7720 return $iothreads;
7721}
7722
7723sub scsihw_infos {
7724 my ($conf, $drive) = @_;
7725
7726 my $maxdev = 0;
7727
7728 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
7729 $maxdev = 7;
7730 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
7731 $maxdev = 1;
7732 } else {
7733 $maxdev = 256;
7734 }
7735
7736 my $controller = int($drive->{index} / $maxdev);
7737 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
7738 ? "virtioscsi"
7739 : "scsihw";
7740
7741 return ($maxdev, $controller, $controller_prefix);
7742}
7743
7744sub windows_version {
7745 my ($ostype) = @_;
7746
7747 return 0 if !$ostype;
7748
7749 my $winversion = 0;
7750
7751 if($ostype eq 'wxp' || $ostype eq 'w2k3' || $ostype eq 'w2k') {
7752 $winversion = 5;
7753 } elsif($ostype eq 'w2k8' || $ostype eq 'wvista') {
7754 $winversion = 6;
7755 } elsif ($ostype =~ m/^win(\d+)$/) {
7756 $winversion = $1;
7757 }
7758
7759 return $winversion;
7760}
7761
7762sub resolve_dst_disk_format {
7763 my ($storecfg, $storeid, $src_volname, $format) = @_;
7764 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
7765
7766 if (!$format) {
7767 # if no target format is specified, use the source disk format as hint
7768 if ($src_volname) {
7769 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7770 $format = qemu_img_format($scfg, $src_volname);
7771 } else {
7772 return $defFormat;
7773 }
7774 }
7775
7776 # test if requested format is supported - else use default
7777 my $supported = grep { $_ eq $format } @$validFormats;
7778 $format = $defFormat if !$supported;
7779 return $format;
7780}
7781
7782# NOTE: if this logic changes, please update docs & possibly gui logic
7783sub find_vmstate_storage {
7784 my ($conf, $storecfg) = @_;
7785
7786 # first, return storage from conf if set
7787 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
7788
7789 my ($target, $shared, $local);
7790
7791 foreach_storage_used_by_vm($conf, sub {
7792 my ($sid) = @_;
7793 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
7794 my $dst = $scfg->{shared} ? \$shared : \$local;
7795 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
7796 });
7797
7798 # second, use shared storage where VM has at least one disk
7799 # third, use local storage where VM has at least one disk
7800 # fall back to local storage
7801 $target = $shared // $local // 'local';
7802
7803 return $target;
7804}
7805
7806sub generate_uuid {
7807 my ($uuid, $uuid_str);
7808 UUID::generate($uuid);
7809 UUID::unparse($uuid, $uuid_str);
7810 return $uuid_str;
7811}
7812
7813sub generate_smbios1_uuid {
7814 return "uuid=".generate_uuid();
7815}
7816
7817sub nbd_stop {
7818 my ($vmid) = @_;
7819
7820 mon_cmd($vmid, 'nbd-server-stop');
7821}
7822
7823sub create_reboot_request {
7824 my ($vmid) = @_;
7825 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
7826 or die "failed to create reboot trigger file: $!\n";
7827 close($fh);
7828}
7829
7830sub clear_reboot_request {
7831 my ($vmid) = @_;
7832 my $path = "/run/qemu-server/$vmid.reboot";
7833 my $res = 0;
7834
7835 $res = unlink($path);
7836 die "could not remove reboot request for $vmid: $!"
7837 if !$res && $! != POSIX::ENOENT;
7838
7839 return $res;
7840}
7841
7842sub bootorder_from_legacy {
7843 my ($conf, $bootcfg) = @_;
7844
7845 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
7846 my $bootindex_hash = {};
7847 my $i = 1;
7848 foreach my $o (split(//, $boot)) {
7849 $bootindex_hash->{$o} = $i*100;
7850 $i++;
7851 }
7852
7853 my $bootorder = {};
7854
7855 PVE::QemuConfig->foreach_volume($conf, sub {
7856 my ($ds, $drive) = @_;
7857
7858 if (drive_is_cdrom ($drive, 1)) {
7859 if ($bootindex_hash->{d}) {
7860 $bootorder->{$ds} = $bootindex_hash->{d};
7861 $bootindex_hash->{d} += 1;
7862 }
7863 } elsif ($bootindex_hash->{c}) {
7864 $bootorder->{$ds} = $bootindex_hash->{c}
7865 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
7866 $bootindex_hash->{c} += 1;
7867 }
7868 });
7869
7870 if ($bootindex_hash->{n}) {
7871 for (my $i = 0; $i < $MAX_NETS; $i++) {
7872 my $netname = "net$i";
7873 next if !$conf->{$netname};
7874 $bootorder->{$netname} = $bootindex_hash->{n};
7875 $bootindex_hash->{n} += 1;
7876 }
7877 }
7878
7879 return $bootorder;
7880}
7881
7882# Generate default device list for 'boot: order=' property. Matches legacy
7883# default boot order, but with explicit device names. This is important, since
7884# the fallback for when neither 'order' nor the old format is specified relies
7885# on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
7886sub get_default_bootdevices {
7887 my ($conf) = @_;
7888
7889 my @ret = ();
7890
7891 # harddisk
7892 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
7893 push @ret, $first if $first;
7894
7895 # cdrom
7896 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
7897 push @ret, $first if $first;
7898
7899 # network
7900 for (my $i = 0; $i < $MAX_NETS; $i++) {
7901 my $netname = "net$i";
7902 next if !$conf->{$netname};
7903 push @ret, $netname;
7904 last;
7905 }
7906
7907 return \@ret;
7908}
7909
7910sub device_bootorder {
7911 my ($conf) = @_;
7912
7913 return bootorder_from_legacy($conf) if !defined($conf->{boot});
7914
7915 my $boot = parse_property_string($boot_fmt, $conf->{boot});
7916
7917 my $bootorder = {};
7918 if (!defined($boot) || $boot->{legacy}) {
7919 $bootorder = bootorder_from_legacy($conf, $boot);
7920 } elsif ($boot->{order}) {
7921 my $i = 100; # start at 100 to allow user to insert devices before us with -args
7922 for my $dev (PVE::Tools::split_list($boot->{order})) {
7923 $bootorder->{$dev} = $i++;
7924 }
7925 }
7926
7927 return $bootorder;
7928}
7929
7930sub register_qmeventd_handle {
7931 my ($vmid) = @_;
7932
7933 my $fh;
7934 my $peer = "/var/run/qmeventd.sock";
7935 my $count = 0;
7936
7937 for (;;) {
7938 $count++;
7939 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
7940 last if $fh;
7941 if ($! != EINTR && $! != EAGAIN) {
7942 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
7943 }
7944 if ($count > 4) {
7945 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
7946 . "after $count retries\n";
7947 }
7948 usleep(25000);
7949 }
7950
7951 # send handshake to mark VM as backing up
7952 print $fh to_json({vzdump => {vmid => "$vmid"}});
7953
7954 # return handle to be closed later when inhibit is no longer required
7955 return $fh;
7956}
7957
7958# bash completion helper
7959
7960sub complete_backup_archives {
7961 my ($cmdname, $pname, $cvalue) = @_;
7962
7963 my $cfg = PVE::Storage::config();
7964
7965 my $storeid;
7966
7967 if ($cvalue =~ m/^([^:]+):/) {
7968 $storeid = $1;
7969 }
7970
7971 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
7972
7973 my $res = [];
7974 foreach my $id (keys %$data) {
7975 foreach my $item (@{$data->{$id}}) {
7976 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
7977 push @$res, $item->{volid} if defined($item->{volid});
7978 }
7979 }
7980
7981 return $res;
7982}
7983
7984my $complete_vmid_full = sub {
7985 my ($running) = @_;
7986
7987 my $idlist = vmstatus();
7988
7989 my $res = [];
7990
7991 foreach my $id (keys %$idlist) {
7992 my $d = $idlist->{$id};
7993 if (defined($running)) {
7994 next if $d->{template};
7995 next if $running && $d->{status} ne 'running';
7996 next if !$running && $d->{status} eq 'running';
7997 }
7998 push @$res, $id;
7999
8000 }
8001 return $res;
8002};
8003
8004sub complete_vmid {
8005 return &$complete_vmid_full();
8006}
8007
8008sub complete_vmid_stopped {
8009 return &$complete_vmid_full(0);
8010}
8011
8012sub complete_vmid_running {
8013 return &$complete_vmid_full(1);
8014}
8015
8016sub complete_storage {
8017
8018 my $cfg = PVE::Storage::config();
8019 my $ids = $cfg->{ids};
8020
8021 my $res = [];
8022 foreach my $sid (keys %$ids) {
8023 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
8024 next if !$ids->{$sid}->{content}->{images};
8025 push @$res, $sid;
8026 }
8027
8028 return $res;
8029}
8030
8031sub complete_migration_storage {
8032 my ($cmd, $param, $current_value, $all_args) = @_;
8033
8034 my $targetnode = @$all_args[1];
8035
8036 my $cfg = PVE::Storage::config();
8037 my $ids = $cfg->{ids};
8038
8039 my $res = [];
8040 foreach my $sid (keys %$ids) {
8041 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
8042 next if !$ids->{$sid}->{content}->{images};
8043 push @$res, $sid;
8044 }
8045
8046 return $res;
8047}
8048
8049sub vm_is_paused {
8050 my ($vmid) = @_;
8051 my $qmpstatus = eval {
8052 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8053 mon_cmd($vmid, "query-status");
8054 };
8055 warn "$@\n" if $@;
8056 return $qmpstatus && $qmpstatus->{status} eq "paused";
8057}
8058
8059sub check_volume_storage_type {
8060 my ($storecfg, $vol) = @_;
8061
8062 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8063 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8064 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8065
8066 die "storage '$storeid' does not support content-type '$vtype'\n"
8067 if !$scfg->{content}->{$vtype};
8068
8069 return 1;
8070}
8071
80721;