]> git.proxmox.com Git - qemu-server.git/blame_incremental - PVE/QemuServer.pm
d/control: bump versioned (build-)dependency of libpve-common-perl
[qemu-server.git] / PVE / QemuServer.pm
... / ...
CommitLineData
1package PVE::QemuServer;
2
3use strict;
4use warnings;
5
6use Cwd 'abs_path';
7use Digest::SHA;
8use Fcntl ':flock';
9use Fcntl;
10use File::Basename;
11use File::Copy qw(copy);
12use File::Path;
13use File::stat;
14use Getopt::Long;
15use IO::Dir;
16use IO::File;
17use IO::Handle;
18use IO::Select;
19use IO::Socket::UNIX;
20use IPC::Open3;
21use JSON;
22use MIME::Base64;
23use POSIX;
24use Storable qw(dclone);
25use Time::HiRes qw(gettimeofday usleep);
26use URI::Escape;
27use UUID;
28
29use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
30use PVE::CGroup;
31use PVE::DataCenterConfig;
32use PVE::Exception qw(raise raise_param_exc);
33use PVE::Format qw(render_duration render_bytes);
34use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
35use PVE::INotify;
36use PVE::JSONSchema qw(get_standard_option parse_property_string);
37use PVE::ProcFSTools;
38use PVE::PBSClient;
39use PVE::RPCEnvironment;
40use PVE::Storage;
41use PVE::SysFSTools;
42use PVE::Systemd;
43use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
44
45use PVE::QMPClient;
46use PVE::QemuConfig;
47use PVE::QemuServer::Helpers qw(min_version config_aware_timeout);
48use PVE::QemuServer::Cloudinit;
49use PVE::QemuServer::CGroup;
50use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
51use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
52use PVE::QemuServer::Machine;
53use PVE::QemuServer::Memory;
54use PVE::QemuServer::Monitor qw(mon_cmd);
55use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
56use PVE::QemuServer::USB qw(parse_usb_device);
57
58my $have_sdn;
59eval {
60 require PVE::Network::SDN::Zones;
61 $have_sdn = 1;
62};
63
64my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
65my $OVMF = {
66 x86_64 => {
67 '4m-no-smm' => [
68 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
69 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
70 ],
71 '4m-no-smm-ms' => [
72 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
73 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
74 ],
75 '4m' => [
76 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
77 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
78 ],
79 '4m-ms' => [
80 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
81 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
82 ],
83 default => [
84 "$EDK2_FW_BASE/OVMF_CODE.fd",
85 "$EDK2_FW_BASE/OVMF_VARS.fd",
86 ],
87 },
88 aarch64 => {
89 default => [
90 "$EDK2_FW_BASE/AAVMF_CODE.fd",
91 "$EDK2_FW_BASE/AAVMF_VARS.fd",
92 ],
93 },
94};
95
96my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
97
98# Note about locking: we use flock on the config file protect against concurent actions.
99# Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
100# 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
101# But you can ignore this kind of lock with the --skiplock flag.
102
103cfs_register_file('/qemu-server/',
104 \&parse_vm_config,
105 \&write_vm_config);
106
107PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
108 description => "Some command save/restore state from this location.",
109 type => 'string',
110 maxLength => 128,
111 optional => 1,
112});
113
114PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
115 description => "Specifies the Qemu machine type.",
116 type => 'string',
117 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
118 maxLength => 40,
119 optional => 1,
120});
121
122PVE::JSONSchema::register_standard_option('pve-targetstorage', {
123 description => "Mapping from source to target storages. Providing only a single storage ID maps all source storages to that storage. Providing the special value '1' will map each source storage to itself.",
124 type => 'string',
125 format => 'storage-pair-list',
126 optional => 1,
127});
128
129#no warnings 'redefine';
130
131my $nodename_cache;
132sub nodename {
133 $nodename_cache //= PVE::INotify::nodename();
134 return $nodename_cache;
135}
136
137my $watchdog_fmt = {
138 model => {
139 default_key => 1,
140 type => 'string',
141 enum => [qw(i6300esb ib700)],
142 description => "Watchdog type to emulate.",
143 default => 'i6300esb',
144 optional => 1,
145 },
146 action => {
147 type => 'string',
148 enum => [qw(reset shutdown poweroff pause debug none)],
149 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
150 optional => 1,
151 },
152};
153PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
154
155my $agent_fmt = {
156 enabled => {
157 description => "Enable/disable communication with a Qemu Guest Agent (QGA) running in the VM.",
158 type => 'boolean',
159 default => 0,
160 default_key => 1,
161 },
162 fstrim_cloned_disks => {
163 description => "Run fstrim after moving a disk or migrating the VM.",
164 type => 'boolean',
165 optional => 1,
166 default => 0
167 },
168 type => {
169 description => "Select the agent type",
170 type => 'string',
171 default => 'virtio',
172 optional => 1,
173 enum => [qw(virtio isa)],
174 },
175};
176
177my $vga_fmt = {
178 type => {
179 description => "Select the VGA type.",
180 type => 'string',
181 default => 'std',
182 optional => 1,
183 default_key => 1,
184 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio vmware)],
185 },
186 memory => {
187 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
188 type => 'integer',
189 optional => 1,
190 minimum => 4,
191 maximum => 512,
192 },
193};
194
195my $ivshmem_fmt = {
196 size => {
197 type => 'integer',
198 minimum => 1,
199 description => "The size of the file in MB.",
200 },
201 name => {
202 type => 'string',
203 pattern => '[a-zA-Z0-9\-]+',
204 optional => 1,
205 format_description => 'string',
206 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
207 },
208};
209
210my $audio_fmt = {
211 device => {
212 type => 'string',
213 enum => [qw(ich9-intel-hda intel-hda AC97)],
214 description => "Configure an audio device."
215 },
216 driver => {
217 type => 'string',
218 enum => ['spice', 'none'],
219 default => 'spice',
220 optional => 1,
221 description => "Driver backend for the audio device."
222 },
223};
224
225my $spice_enhancements_fmt = {
226 foldersharing => {
227 type => 'boolean',
228 optional => 1,
229 default => '0',
230 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
231 },
232 videostreaming => {
233 type => 'string',
234 enum => ['off', 'all', 'filter'],
235 default => 'off',
236 optional => 1,
237 description => "Enable video streaming. Uses compression for detected video streams."
238 },
239};
240
241my $rng_fmt = {
242 source => {
243 type => 'string',
244 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
245 default_key => 1,
246 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
247 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
248 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
249 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
250 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
251 ." a hardware RNG from the host.",
252 },
253 max_bytes => {
254 type => 'integer',
255 description => "Maximum bytes of entropy allowed to get injected into the guest every"
256 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
257 ." `0` to disable limiting (potentially dangerous!).",
258 optional => 1,
259
260 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
261 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
262 # reading from /dev/urandom
263 default => 1024,
264 },
265 period => {
266 type => 'integer',
267 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
268 ." the guest to retrieve another 'max_bytes' of entropy.",
269 optional => 1,
270 default => 1000,
271 },
272};
273
274my $meta_info_fmt = {
275 'ctime' => {
276 type => 'integer',
277 description => "The guest creation timestamp as UNIX epoch time",
278 minimum => 0,
279 optional => 1,
280 },
281 'creation-qemu' => {
282 type => 'string',
283 description => "The QEMU (machine) version from the time this VM was created.",
284 pattern => '\d+(\.\d+)+',
285 optional => 1,
286 },
287};
288
289my $confdesc = {
290 onboot => {
291 optional => 1,
292 type => 'boolean',
293 description => "Specifies whether a VM will be started during system bootup.",
294 default => 0,
295 },
296 autostart => {
297 optional => 1,
298 type => 'boolean',
299 description => "Automatic restart after crash (currently ignored).",
300 default => 0,
301 },
302 hotplug => {
303 optional => 1,
304 type => 'string', format => 'pve-hotplug-features',
305 description => "Selectively enable hotplug features. This is a comma separated list of"
306 ." hotplug features: 'network', 'disk', 'cpu', 'memory' and 'usb'. Use '0' to disable"
307 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`.",
308 default => 'network,disk,usb',
309 },
310 reboot => {
311 optional => 1,
312 type => 'boolean',
313 description => "Allow reboot. If set to '0' the VM exit on reboot.",
314 default => 1,
315 },
316 lock => {
317 optional => 1,
318 type => 'string',
319 description => "Lock/unlock the VM.",
320 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
321 },
322 cpulimit => {
323 optional => 1,
324 type => 'number',
325 description => "Limit of CPU usage.",
326 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
327 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
328 minimum => 0,
329 maximum => 128,
330 default => 0,
331 },
332 cpuunits => {
333 optional => 1,
334 type => 'integer',
335 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
336 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
337 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
338 ." weights of all the other running VMs.",
339 minimum => 2,
340 maximum => 262144,
341 default => 'cgroup v1: 1024, cgroup v2: 100',
342 },
343 memory => {
344 optional => 1,
345 type => 'integer',
346 description => "Amount of RAM for the VM in MB. This is the maximum available memory when"
347 ." you use the balloon device.",
348 minimum => 16,
349 default => 512,
350 },
351 balloon => {
352 optional => 1,
353 type => 'integer',
354 description => "Amount of target RAM for the VM in MB. Using zero disables the ballon driver.",
355 minimum => 0,
356 },
357 shares => {
358 optional => 1,
359 type => 'integer',
360 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
361 ." more memory this VM gets. Number is relative to weights of all other running VMs."
362 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
363 minimum => 0,
364 maximum => 50000,
365 default => 1000,
366 },
367 keyboard => {
368 optional => 1,
369 type => 'string',
370 description => "Keyboard layout for VNC server. The default is read from the"
371 ."'/etc/pve/datacenter.cfg' configuration file. It should not be necessary to set it.",
372 enum => PVE::Tools::kvmkeymaplist(),
373 default => undef,
374 },
375 name => {
376 optional => 1,
377 type => 'string', format => 'dns-name',
378 description => "Set a name for the VM. Only used on the configuration web interface.",
379 },
380 scsihw => {
381 optional => 1,
382 type => 'string',
383 description => "SCSI controller model",
384 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
385 default => 'lsi',
386 },
387 description => {
388 optional => 1,
389 type => 'string',
390 description => "Description for the VM. Shown in the web-interface VM's summary."
391 ." This is saved as comment inside the configuration file.",
392 maxLength => 1024 * 8,
393 },
394 ostype => {
395 optional => 1,
396 type => 'string',
397 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
398 description => "Specify guest operating system.",
399 verbose_description => <<EODESC,
400Specify guest operating system. This is used to enable special
401optimization/features for specific operating systems:
402
403[horizontal]
404other;; unspecified OS
405wxp;; Microsoft Windows XP
406w2k;; Microsoft Windows 2000
407w2k3;; Microsoft Windows 2003
408w2k8;; Microsoft Windows 2008
409wvista;; Microsoft Windows Vista
410win7;; Microsoft Windows 7
411win8;; Microsoft Windows 8/2012/2012r2
412win10;; Microsoft Windows 10/2016/2019
413win11;; Microsoft Windows 11/2022
414l24;; Linux 2.4 Kernel
415l26;; Linux 2.6 - 5.X Kernel
416solaris;; Solaris/OpenSolaris/OpenIndiania kernel
417EODESC
418 },
419 boot => {
420 optional => 1,
421 type => 'string', format => 'pve-qm-boot',
422 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
423 ." key or 'legacy=' is deprecated.",
424 },
425 bootdisk => {
426 optional => 1,
427 type => 'string', format => 'pve-qm-bootdisk',
428 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
429 pattern => '(ide|sata|scsi|virtio)\d+',
430 },
431 smp => {
432 optional => 1,
433 type => 'integer',
434 description => "The number of CPUs. Please use option -sockets instead.",
435 minimum => 1,
436 default => 1,
437 },
438 sockets => {
439 optional => 1,
440 type => 'integer',
441 description => "The number of CPU sockets.",
442 minimum => 1,
443 default => 1,
444 },
445 cores => {
446 optional => 1,
447 type => 'integer',
448 description => "The number of cores per socket.",
449 minimum => 1,
450 default => 1,
451 },
452 numa => {
453 optional => 1,
454 type => 'boolean',
455 description => "Enable/disable NUMA.",
456 default => 0,
457 },
458 hugepages => {
459 optional => 1,
460 type => 'string',
461 description => "Enable/disable hugepages memory.",
462 enum => [qw(any 2 1024)],
463 },
464 keephugepages => {
465 optional => 1,
466 type => 'boolean',
467 default => 0,
468 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
469 ." after VM shutdown and can be used for subsequent starts.",
470 },
471 vcpus => {
472 optional => 1,
473 type => 'integer',
474 description => "Number of hotplugged vcpus.",
475 minimum => 1,
476 default => 0,
477 },
478 acpi => {
479 optional => 1,
480 type => 'boolean',
481 description => "Enable/disable ACPI.",
482 default => 1,
483 },
484 agent => {
485 optional => 1,
486 description => "Enable/disable communication with the Qemu Guest Agent and its properties.",
487 type => 'string',
488 format => $agent_fmt,
489 },
490 kvm => {
491 optional => 1,
492 type => 'boolean',
493 description => "Enable/disable KVM hardware virtualization.",
494 default => 1,
495 },
496 tdf => {
497 optional => 1,
498 type => 'boolean',
499 description => "Enable/disable time drift fix.",
500 default => 0,
501 },
502 localtime => {
503 optional => 1,
504 type => 'boolean',
505 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
506 ." the `ostype` indicates a Microsoft Windows OS.",
507 },
508 freeze => {
509 optional => 1,
510 type => 'boolean',
511 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
512 },
513 vga => {
514 optional => 1,
515 type => 'string', format => $vga_fmt,
516 description => "Configure the VGA hardware.",
517 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
518 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
519 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
520 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
521 ." display server. For win* OS you can select how many independent displays you want,"
522 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
523 ." using a serial device as terminal.",
524 },
525 watchdog => {
526 optional => 1,
527 type => 'string', format => 'pve-qm-watchdog',
528 description => "Create a virtual hardware watchdog device.",
529 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
530 ." action), the watchdog must be periodically polled by an agent inside the guest or"
531 ." else the watchdog will reset the guest (or execute the respective action specified)",
532 },
533 startdate => {
534 optional => 1,
535 type => 'string',
536 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
537 description => "Set the initial date of the real time clock. Valid format for date are:"
538 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
539 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
540 default => 'now',
541 },
542 startup => get_standard_option('pve-startup-order'),
543 template => {
544 optional => 1,
545 type => 'boolean',
546 description => "Enable/disable Template.",
547 default => 0,
548 },
549 args => {
550 optional => 1,
551 type => 'string',
552 description => "Arbitrary arguments passed to kvm.",
553 verbose_description => <<EODESCR,
554Arbitrary arguments passed to kvm, for example:
555
556args: -no-reboot -no-hpet
557
558NOTE: this option is for experts only.
559EODESCR
560 },
561 tablet => {
562 optional => 1,
563 type => 'boolean',
564 default => 1,
565 description => "Enable/disable the USB tablet device.",
566 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
567 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
568 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
569 ." may consider disabling this to save some context switches. This is turned off by"
570 ." default if you use spice (`qm set <vmid> --vga qxl`).",
571 },
572 migrate_speed => {
573 optional => 1,
574 type => 'integer',
575 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
576 minimum => 0,
577 default => 0,
578 },
579 migrate_downtime => {
580 optional => 1,
581 type => 'number',
582 description => "Set maximum tolerated downtime (in seconds) for migrations.",
583 minimum => 0,
584 default => 0.1,
585 },
586 cdrom => {
587 optional => 1,
588 type => 'string', format => 'pve-qm-ide',
589 typetext => '<volume>',
590 description => "This is an alias for option -ide2",
591 },
592 cpu => {
593 optional => 1,
594 description => "Emulated CPU type.",
595 type => 'string',
596 format => 'pve-vm-cpu-conf',
597 },
598 parent => get_standard_option('pve-snapshot-name', {
599 optional => 1,
600 description => "Parent snapshot name. This is used internally, and should not be modified.",
601 }),
602 snaptime => {
603 optional => 1,
604 description => "Timestamp for snapshots.",
605 type => 'integer',
606 minimum => 0,
607 },
608 vmstate => {
609 optional => 1,
610 type => 'string', format => 'pve-volume-id',
611 description => "Reference to a volume which stores the VM state. This is used internally"
612 ." for snapshots.",
613 },
614 vmstatestorage => get_standard_option('pve-storage-id', {
615 description => "Default storage for VM state volumes/files.",
616 optional => 1,
617 }),
618 runningmachine => get_standard_option('pve-qemu-machine', {
619 description => "Specifies the QEMU machine type of the running vm. This is used internally"
620 ." for snapshots.",
621 }),
622 runningcpu => {
623 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
624 ." internally for snapshots.",
625 optional => 1,
626 type => 'string',
627 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
628 format_description => 'QEMU -cpu parameter'
629 },
630 machine => get_standard_option('pve-qemu-machine'),
631 arch => {
632 description => "Virtual processor architecture. Defaults to the host.",
633 optional => 1,
634 type => 'string',
635 enum => [qw(x86_64 aarch64)],
636 },
637 smbios1 => {
638 description => "Specify SMBIOS type 1 fields.",
639 type => 'string', format => 'pve-qm-smbios1',
640 maxLength => 512,
641 optional => 1,
642 },
643 protection => {
644 optional => 1,
645 type => 'boolean',
646 description => "Sets the protection flag of the VM. This will disable the remove VM and"
647 ." remove disk operations.",
648 default => 0,
649 },
650 bios => {
651 optional => 1,
652 type => 'string',
653 enum => [ qw(seabios ovmf) ],
654 description => "Select BIOS implementation.",
655 default => 'seabios',
656 },
657 vmgenid => {
658 type => 'string',
659 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
660 format_description => 'UUID',
661 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
662 ." to disable explicitly.",
663 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
664 ." value identifier to the guest OS. This allows to notify the guest operating system"
665 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
666 ." execution or creation from a template). The guest operating system notices the"
667 ." change, and is then able to react as appropriate by marking its copies of"
668 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
669 ."Note that auto-creation only works when done through API/CLI create or update methods"
670 .", but not when manually editing the config file.",
671 default => "1 (autogenerated)",
672 optional => 1,
673 },
674 hookscript => {
675 type => 'string',
676 format => 'pve-volume-id',
677 optional => 1,
678 description => "Script that will be executed during various steps in the vms lifetime.",
679 },
680 ivshmem => {
681 type => 'string',
682 format => $ivshmem_fmt,
683 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
684 ." the host.",
685 optional => 1,
686 },
687 audio0 => {
688 type => 'string',
689 format => $audio_fmt,
690 description => "Configure a audio device, useful in combination with QXL/Spice.",
691 optional => 1
692 },
693 spice_enhancements => {
694 type => 'string',
695 format => $spice_enhancements_fmt,
696 description => "Configure additional enhancements for SPICE.",
697 optional => 1
698 },
699 tags => {
700 type => 'string', format => 'pve-tag-list',
701 description => 'Tags of the VM. This is only meta information.',
702 optional => 1,
703 },
704 rng0 => {
705 type => 'string',
706 format => $rng_fmt,
707 description => "Configure a VirtIO-based Random Number Generator.",
708 optional => 1,
709 },
710 meta => {
711 type => 'string',
712 format => $meta_info_fmt,
713 description => "Some (read-only) meta-information about this guest.",
714 optional => 1,
715 },
716};
717
718my $cicustom_fmt = {
719 meta => {
720 type => 'string',
721 optional => 1,
722 description => 'Specify a custom file containing all meta data passed to the VM via"
723 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
724 format => 'pve-volume-id',
725 format_description => 'volume',
726 },
727 network => {
728 type => 'string',
729 optional => 1,
730 description => 'Specify a custom file containing all network data passed to the VM via'
731 .' cloud-init.',
732 format => 'pve-volume-id',
733 format_description => 'volume',
734 },
735 user => {
736 type => 'string',
737 optional => 1,
738 description => 'Specify a custom file containing all user data passed to the VM via'
739 .' cloud-init.',
740 format => 'pve-volume-id',
741 format_description => 'volume',
742 },
743 vendor => {
744 type => 'string',
745 optional => 1,
746 description => 'Specify a custom file containing all vendor data passed to the VM via'
747 .' cloud-init.',
748 format => 'pve-volume-id',
749 format_description => 'volume',
750 },
751};
752PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
753
754my $confdesc_cloudinit = {
755 citype => {
756 optional => 1,
757 type => 'string',
758 description => 'Specifies the cloud-init configuration format. The default depends on the'
759 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
760 .' and `configdrive2` for windows.',
761 enum => ['configdrive2', 'nocloud', 'opennebula'],
762 },
763 ciuser => {
764 optional => 1,
765 type => 'string',
766 description => "cloud-init: User name to change ssh keys and password for instead of the"
767 ." image's configured default user.",
768 },
769 cipassword => {
770 optional => 1,
771 type => 'string',
772 description => 'cloud-init: Password to assign the user. Using this is generally not'
773 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
774 .' support hashed passwords.',
775 },
776 cicustom => {
777 optional => 1,
778 type => 'string',
779 description => 'cloud-init: Specify custom files to replace the automatically generated'
780 .' ones at start.',
781 format => 'pve-qm-cicustom',
782 },
783 searchdomain => {
784 optional => 1,
785 type => 'string',
786 description => "cloud-init: Sets DNS search domains for a container. Create will'
787 .' automatically use the setting from the host if neither searchdomain nor nameserver'
788 .' are set.",
789 },
790 nameserver => {
791 optional => 1,
792 type => 'string', format => 'address-list',
793 description => "cloud-init: Sets DNS server IP address for a container. Create will'
794 .' automatically use the setting from the host if neither searchdomain nor nameserver'
795 .' are set.",
796 },
797 sshkeys => {
798 optional => 1,
799 type => 'string',
800 format => 'urlencoded',
801 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
802 },
803};
804
805# what about other qemu settings ?
806#cpu => 'string',
807#machine => 'string',
808#fda => 'file',
809#fdb => 'file',
810#mtdblock => 'file',
811#sd => 'file',
812#pflash => 'file',
813#snapshot => 'bool',
814#bootp => 'file',
815##tftp => 'dir',
816##smb => 'dir',
817#kernel => 'file',
818#append => 'string',
819#initrd => 'file',
820##soundhw => 'string',
821
822while (my ($k, $v) = each %$confdesc) {
823 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
824}
825
826my $MAX_USB_DEVICES = 5;
827my $MAX_NETS = 32;
828my $MAX_SERIAL_PORTS = 4;
829my $MAX_PARALLEL_PORTS = 3;
830my $MAX_NUMA = 8;
831
832my $numa_fmt = {
833 cpus => {
834 type => "string",
835 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
836 description => "CPUs accessing this NUMA node.",
837 format_description => "id[-id];...",
838 },
839 memory => {
840 type => "number",
841 description => "Amount of memory this NUMA node provides.",
842 optional => 1,
843 },
844 hostnodes => {
845 type => "string",
846 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
847 description => "Host NUMA nodes to use.",
848 format_description => "id[-id];...",
849 optional => 1,
850 },
851 policy => {
852 type => 'string',
853 enum => [qw(preferred bind interleave)],
854 description => "NUMA allocation policy.",
855 optional => 1,
856 },
857};
858PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
859my $numadesc = {
860 optional => 1,
861 type => 'string', format => $numa_fmt,
862 description => "NUMA topology.",
863};
864PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
865
866for (my $i = 0; $i < $MAX_NUMA; $i++) {
867 $confdesc->{"numa$i"} = $numadesc;
868}
869
870my $nic_model_list = [
871 'e1000',
872 'e1000-82540em',
873 'e1000-82544gc',
874 'e1000-82545em',
875 'e1000e',
876 'i82551',
877 'i82557b',
878 'i82559er',
879 'ne2k_isa',
880 'ne2k_pci',
881 'pcnet',
882 'rtl8139',
883 'virtio',
884 'vmxnet3',
885];
886my $nic_model_list_txt = join(' ', sort @$nic_model_list);
887
888my $net_fmt_bridge_descr = <<__EOD__;
889Bridge to attach the network device to. The Proxmox VE standard bridge
890is called 'vmbr0'.
891
892If you do not specify a bridge, we create a kvm user (NATed) network
893device, which provides DHCP and DNS services. The following addresses
894are used:
895
896 10.0.2.2 Gateway
897 10.0.2.3 DNS Server
898 10.0.2.4 SMB Server
899
900The DHCP server assign addresses to the guest starting from 10.0.2.15.
901__EOD__
902
903my $net_fmt = {
904 macaddr => get_standard_option('mac-addr', {
905 description => "MAC address. That address must be unique withing your network. This is"
906 ." automatically generated if not specified.",
907 }),
908 model => {
909 type => 'string',
910 description => "Network Card Model. The 'virtio' model provides the best performance with"
911 ." very low CPU overhead. If your guest does not support this driver, it is usually"
912 ." best to use 'e1000'.",
913 enum => $nic_model_list,
914 default_key => 1,
915 },
916 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
917 bridge => {
918 type => 'string',
919 description => $net_fmt_bridge_descr,
920 format_description => 'bridge',
921 pattern => '[-_.\w\d]+',
922 optional => 1,
923 },
924 queues => {
925 type => 'integer',
926 minimum => 0, maximum => 16,
927 description => 'Number of packet queues to be used on the device.',
928 optional => 1,
929 },
930 rate => {
931 type => 'number',
932 minimum => 0,
933 description => "Rate limit in mbps (megabytes per second) as floating point number.",
934 optional => 1,
935 },
936 tag => {
937 type => 'integer',
938 minimum => 1, maximum => 4094,
939 description => 'VLAN tag to apply to packets on this interface.',
940 optional => 1,
941 },
942 trunks => {
943 type => 'string',
944 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
945 description => 'VLAN trunks to pass through this interface.',
946 format_description => 'vlanid[;vlanid...]',
947 optional => 1,
948 },
949 firewall => {
950 type => 'boolean',
951 description => 'Whether this interface should be protected by the firewall.',
952 optional => 1,
953 },
954 link_down => {
955 type => 'boolean',
956 description => 'Whether this interface should be disconnected (like pulling the plug).',
957 optional => 1,
958 },
959 mtu => {
960 type => 'integer',
961 minimum => 1, maximum => 65520,
962 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
963 optional => 1,
964 },
965};
966
967my $netdesc = {
968 optional => 1,
969 type => 'string', format => $net_fmt,
970 description => "Specify network devices.",
971};
972
973PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
974
975my $ipconfig_fmt = {
976 ip => {
977 type => 'string',
978 format => 'pve-ipv4-config',
979 format_description => 'IPv4Format/CIDR',
980 description => 'IPv4 address in CIDR format.',
981 optional => 1,
982 default => 'dhcp',
983 },
984 gw => {
985 type => 'string',
986 format => 'ipv4',
987 format_description => 'GatewayIPv4',
988 description => 'Default gateway for IPv4 traffic.',
989 optional => 1,
990 requires => 'ip',
991 },
992 ip6 => {
993 type => 'string',
994 format => 'pve-ipv6-config',
995 format_description => 'IPv6Format/CIDR',
996 description => 'IPv6 address in CIDR format.',
997 optional => 1,
998 default => 'dhcp',
999 },
1000 gw6 => {
1001 type => 'string',
1002 format => 'ipv6',
1003 format_description => 'GatewayIPv6',
1004 description => 'Default gateway for IPv6 traffic.',
1005 optional => 1,
1006 requires => 'ip6',
1007 },
1008};
1009PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
1010my $ipconfigdesc = {
1011 optional => 1,
1012 type => 'string', format => 'pve-qm-ipconfig',
1013 description => <<'EODESCR',
1014cloud-init: Specify IP addresses and gateways for the corresponding interface.
1015
1016IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1017
1018The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1019gateway should be provided.
1020For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1021cloud-init 19.4 or newer.
1022
1023If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1024dhcp on IPv4.
1025EODESCR
1026};
1027PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1028
1029for (my $i = 0; $i < $MAX_NETS; $i++) {
1030 $confdesc->{"net$i"} = $netdesc;
1031 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1032}
1033
1034foreach my $key (keys %$confdesc_cloudinit) {
1035 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1036}
1037
1038PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1039sub verify_volume_id_or_qm_path {
1040 my ($volid, $noerr) = @_;
1041
1042 if ($volid eq 'none' || $volid eq 'cdrom' || $volid =~ m|^/|) {
1043 return $volid;
1044 }
1045
1046 # if its neither 'none' nor 'cdrom' nor a path, check if its a volume-id
1047 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1048 if ($@) {
1049 return if $noerr;
1050 die $@;
1051 }
1052 return $volid;
1053}
1054
1055my $usb_fmt = {
1056 host => {
1057 default_key => 1,
1058 type => 'string', format => 'pve-qm-usb-device',
1059 format_description => 'HOSTUSBDEVICE|spice',
1060 description => <<EODESCR,
1061The Host USB device or port or the value 'spice'. HOSTUSBDEVICE syntax is:
1062
1063 'bus-port(.port)*' (decimal numbers) or
1064 'vendor_id:product_id' (hexadeciaml numbers) or
1065 'spice'
1066
1067You can use the 'lsusb -t' command to list existing usb devices.
1068
1069NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1070machines - use with special care.
1071
1072The value 'spice' can be used to add a usb redirection devices for spice.
1073EODESCR
1074 },
1075 usb3 => {
1076 optional => 1,
1077 type => 'boolean',
1078 description => "Specifies whether if given host option is a USB3 device or port.",
1079 default => 0,
1080 },
1081};
1082
1083my $usbdesc = {
1084 optional => 1,
1085 type => 'string', format => $usb_fmt,
1086 description => "Configure an USB device (n is 0 to 4).",
1087};
1088PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
1089
1090my $serialdesc = {
1091 optional => 1,
1092 type => 'string',
1093 pattern => '(/dev/.+|socket)',
1094 description => "Create a serial device inside the VM (n is 0 to 3)",
1095 verbose_description => <<EODESCR,
1096Create a serial device inside the VM (n is 0 to 3), and pass through a
1097host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1098host side (use 'qm terminal' to open a terminal connection).
1099
1100NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1101use with special care.
1102
1103CAUTION: Experimental! User reported problems with this option.
1104EODESCR
1105};
1106
1107my $paralleldesc= {
1108 optional => 1,
1109 type => 'string',
1110 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1111 description => "Map host parallel devices (n is 0 to 2).",
1112 verbose_description => <<EODESCR,
1113Map host parallel devices (n is 0 to 2).
1114
1115NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1116machines - use with special care.
1117
1118CAUTION: Experimental! User reported problems with this option.
1119EODESCR
1120};
1121
1122for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1123 $confdesc->{"parallel$i"} = $paralleldesc;
1124}
1125
1126for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1127 $confdesc->{"serial$i"} = $serialdesc;
1128}
1129
1130for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1131 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1132}
1133
1134for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1135 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1136}
1137
1138for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1139 $confdesc->{"usb$i"} = $usbdesc;
1140}
1141
1142my $boot_fmt = {
1143 legacy => {
1144 optional => 1,
1145 default_key => 1,
1146 type => 'string',
1147 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1148 . " Deprecated, use 'order=' instead.",
1149 pattern => '[acdn]{1,4}',
1150 format_description => "[acdn]{1,4}",
1151
1152 # note: this is also the fallback if boot: is not given at all
1153 default => 'cdn',
1154 },
1155 order => {
1156 optional => 1,
1157 type => 'string',
1158 format => 'pve-qm-bootdev-list',
1159 format_description => "device[;device...]",
1160 description => <<EODESC,
1161The guest will attempt to boot from devices in the order they appear here.
1162
1163Disks, optical drives and passed-through storage USB devices will be directly
1164booted from, NICs will load PXE, and PCIe devices will either behave like disks
1165(e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1166
1167Note that only devices in this list will be marked as bootable and thus loaded
1168by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1169(e.g. software-raid), you need to specify all of them here.
1170
1171Overrides the deprecated 'legacy=[acdn]*' value when given.
1172EODESC
1173 },
1174};
1175PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1176
1177PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1178sub verify_bootdev {
1179 my ($dev, $noerr) = @_;
1180
1181 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1182 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1183
1184 my $check = sub {
1185 my ($base) = @_;
1186 return 0 if $dev !~ m/^$base\d+$/;
1187 return 0 if !$confdesc->{$dev};
1188 return 1;
1189 };
1190
1191 return $dev if $check->("net");
1192 return $dev if $check->("usb");
1193 return $dev if $check->("hostpci");
1194
1195 return if $noerr;
1196 die "invalid boot device '$dev'\n";
1197}
1198
1199sub print_bootorder {
1200 my ($devs) = @_;
1201 return "" if !@$devs;
1202 my $data = { order => join(';', @$devs) };
1203 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1204}
1205
1206my $kvm_api_version = 0;
1207
1208sub kvm_version {
1209 return $kvm_api_version if $kvm_api_version;
1210
1211 open my $fh, '<', '/dev/kvm' or return;
1212
1213 # 0xae00 => KVM_GET_API_VERSION
1214 $kvm_api_version = ioctl($fh, 0xae00, 0);
1215 close($fh);
1216
1217 return $kvm_api_version;
1218}
1219
1220my $kvm_user_version = {};
1221my $kvm_mtime = {};
1222
1223sub kvm_user_version {
1224 my ($binary) = @_;
1225
1226 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1227 my $st = stat($binary);
1228
1229 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1230 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1231 $cachedmtime == $st->mtime;
1232
1233 $kvm_user_version->{$binary} = 'unknown';
1234 $kvm_mtime->{$binary} = $st->mtime;
1235
1236 my $code = sub {
1237 my $line = shift;
1238 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1239 $kvm_user_version->{$binary} = $2;
1240 }
1241 };
1242
1243 eval { run_command([$binary, '--version'], outfunc => $code); };
1244 warn $@ if $@;
1245
1246 return $kvm_user_version->{$binary};
1247
1248}
1249my sub extract_version {
1250 my ($machine_type, $version) = @_;
1251 $version = kvm_user_version() if !defined($version);
1252 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
1253}
1254
1255sub kernel_has_vhost_net {
1256 return -c '/dev/vhost-net';
1257}
1258
1259sub option_exists {
1260 my $key = shift;
1261 return defined($confdesc->{$key});
1262}
1263
1264my $cdrom_path;
1265sub get_cdrom_path {
1266
1267 return $cdrom_path if $cdrom_path;
1268
1269 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
1270 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
1271 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
1272}
1273
1274sub get_iso_path {
1275 my ($storecfg, $vmid, $cdrom) = @_;
1276
1277 if ($cdrom eq 'cdrom') {
1278 return get_cdrom_path();
1279 } elsif ($cdrom eq 'none') {
1280 return '';
1281 } elsif ($cdrom =~ m|^/|) {
1282 return $cdrom;
1283 } else {
1284 return PVE::Storage::path($storecfg, $cdrom);
1285 }
1286}
1287
1288# try to convert old style file names to volume IDs
1289sub filename_to_volume_id {
1290 my ($vmid, $file, $media) = @_;
1291
1292 if (!($file eq 'none' || $file eq 'cdrom' ||
1293 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1294
1295 return if $file =~ m|/|;
1296
1297 if ($media && $media eq 'cdrom') {
1298 $file = "local:iso/$file";
1299 } else {
1300 $file = "local:$vmid/$file";
1301 }
1302 }
1303
1304 return $file;
1305}
1306
1307sub verify_media_type {
1308 my ($opt, $vtype, $media) = @_;
1309
1310 return if !$media;
1311
1312 my $etype;
1313 if ($media eq 'disk') {
1314 $etype = 'images';
1315 } elsif ($media eq 'cdrom') {
1316 $etype = 'iso';
1317 } else {
1318 die "internal error";
1319 }
1320
1321 return if ($vtype eq $etype);
1322
1323 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1324}
1325
1326sub cleanup_drive_path {
1327 my ($opt, $storecfg, $drive) = @_;
1328
1329 # try to convert filesystem paths to volume IDs
1330
1331 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1332 ($drive->{file} !~ m|^/dev/.+|) &&
1333 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1334 ($drive->{file} !~ m/^\d+$/)) {
1335 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1336 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1337 if !$vtype;
1338 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1339 verify_media_type($opt, $vtype, $drive->{media});
1340 $drive->{file} = $volid;
1341 }
1342
1343 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1344}
1345
1346sub parse_hotplug_features {
1347 my ($data) = @_;
1348
1349 my $res = {};
1350
1351 return $res if $data eq '0';
1352
1353 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1354
1355 foreach my $feature (PVE::Tools::split_list($data)) {
1356 if ($feature =~ m/^(network|disk|cpu|memory|usb)$/) {
1357 $res->{$1} = 1;
1358 } else {
1359 die "invalid hotplug feature '$feature'\n";
1360 }
1361 }
1362 return $res;
1363}
1364
1365PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1366sub pve_verify_hotplug_features {
1367 my ($value, $noerr) = @_;
1368
1369 return $value if parse_hotplug_features($value);
1370
1371 return if $noerr;
1372
1373 die "unable to parse hotplug option\n";
1374}
1375
1376sub scsi_inquiry {
1377 my($fh, $noerr) = @_;
1378
1379 my $SG_IO = 0x2285;
1380 my $SG_GET_VERSION_NUM = 0x2282;
1381
1382 my $versionbuf = "\x00" x 8;
1383 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1384 if (!$ret) {
1385 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1386 return;
1387 }
1388 my $version = unpack("I", $versionbuf);
1389 if ($version < 30000) {
1390 die "scsi generic interface too old\n" if !$noerr;
1391 return;
1392 }
1393
1394 my $buf = "\x00" x 36;
1395 my $sensebuf = "\x00" x 8;
1396 my $cmd = pack("C x3 C x1", 0x12, 36);
1397
1398 # see /usr/include/scsi/sg.h
1399 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1400
1401 my $packet = pack(
1402 $sg_io_hdr_t, ord('S'), -3, length($cmd), length($sensebuf), 0, length($buf), $buf, $cmd, $sensebuf, 6000
1403 );
1404
1405 $ret = ioctl($fh, $SG_IO, $packet);
1406 if (!$ret) {
1407 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1408 return;
1409 }
1410
1411 my @res = unpack($sg_io_hdr_t, $packet);
1412 if ($res[17] || $res[18]) {
1413 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1414 return;
1415 }
1416
1417 my $res = {};
1418 $res->@{qw(type removable vendor product revision)} = unpack("C C x6 A8 A16 A4", $buf);
1419
1420 $res->{removable} = $res->{removable} & 128 ? 1 : 0;
1421 $res->{type} &= 0x1F;
1422
1423 return $res;
1424}
1425
1426sub path_is_scsi {
1427 my ($path) = @_;
1428
1429 my $fh = IO::File->new("+<$path") || return;
1430 my $res = scsi_inquiry($fh, 1);
1431 close($fh);
1432
1433 return $res;
1434}
1435
1436sub print_tabletdevice_full {
1437 my ($conf, $arch) = @_;
1438
1439 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1440
1441 # we use uhci for old VMs because tablet driver was buggy in older qemu
1442 my $usbbus;
1443 if (PVE::QemuServer::Machine::machine_type_is_q35($conf) || $arch eq 'aarch64') {
1444 $usbbus = 'ehci';
1445 } else {
1446 $usbbus = 'uhci';
1447 }
1448
1449 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1450}
1451
1452sub print_keyboarddevice_full {
1453 my ($conf, $arch) = @_;
1454
1455 return if $arch ne 'aarch64';
1456
1457 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1458}
1459
1460my sub get_drive_id {
1461 my ($drive) = @_;
1462 return "$drive->{interface}$drive->{index}";
1463}
1464
1465sub print_drivedevice_full {
1466 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1467
1468 my $device = '';
1469 my $maxdev = 0;
1470
1471 my $drive_id = get_drive_id($drive);
1472 if ($drive->{interface} eq 'virtio') {
1473 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1474 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1475 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1476 } elsif ($drive->{interface} eq 'scsi') {
1477
1478 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1479 my $unit = $drive->{index} % $maxdev;
1480 my $devicetype = 'hd';
1481 my $path = '';
1482 if (drive_is_cdrom($drive)) {
1483 $devicetype = 'cd';
1484 } else {
1485 if ($drive->{file} =~ m|^/|) {
1486 $path = $drive->{file};
1487 if (my $info = path_is_scsi($path)) {
1488 if ($info->{type} == 0 && $drive->{scsiblock}) {
1489 $devicetype = 'block';
1490 } elsif ($info->{type} == 1) { # tape
1491 $devicetype = 'generic';
1492 }
1493 }
1494 } else {
1495 $path = PVE::Storage::path($storecfg, $drive->{file});
1496 }
1497
1498 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
1499 my $version = extract_version($machine_type, kvm_user_version());
1500 if ($path =~ m/^iscsi\:\/\// &&
1501 !min_version($version, 4, 1)) {
1502 $devicetype = 'generic';
1503 }
1504 }
1505
1506 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1507 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
1508 } else {
1509 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1510 .",lun=$drive->{index}";
1511 }
1512 $device .= ",drive=drive-$drive_id,id=$drive_id";
1513
1514 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1515 $device .= ",rotation_rate=1";
1516 }
1517 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1518
1519 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1520 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1521 my $controller = int($drive->{index} / $maxdev);
1522 my $unit = $drive->{index} % $maxdev;
1523 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1524
1525 $device = "ide-$devicetype";
1526 if ($drive->{interface} eq 'ide') {
1527 $device .= ",bus=ide.$controller,unit=$unit";
1528 } else {
1529 $device .= ",bus=ahci$controller.$unit";
1530 }
1531 $device .= ",drive=drive-$drive_id,id=$drive_id";
1532
1533 if ($devicetype eq 'hd') {
1534 if (my $model = $drive->{model}) {
1535 $model = URI::Escape::uri_unescape($model);
1536 $device .= ",model=$model";
1537 }
1538 if ($drive->{ssd}) {
1539 $device .= ",rotation_rate=1";
1540 }
1541 }
1542 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1543 } elsif ($drive->{interface} eq 'usb') {
1544 die "implement me";
1545 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1546 } else {
1547 die "unsupported interface type";
1548 }
1549
1550 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1551
1552 if (my $serial = $drive->{serial}) {
1553 $serial = URI::Escape::uri_unescape($serial);
1554 $device .= ",serial=$serial";
1555 }
1556
1557
1558 return $device;
1559}
1560
1561sub get_initiator_name {
1562 my $initiator;
1563
1564 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1565 while (defined(my $line = <$fh>)) {
1566 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1567 $initiator = $1;
1568 last;
1569 }
1570 $fh->close();
1571
1572 return $initiator;
1573}
1574
1575sub print_drive_commandline_full {
1576 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1577
1578 my $path;
1579 my $volid = $drive->{file};
1580 my $format = $drive->{format};
1581 my $drive_id = get_drive_id($drive);
1582
1583 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1584 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1585
1586 if (drive_is_cdrom($drive)) {
1587 $path = get_iso_path($storecfg, $vmid, $volid);
1588 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
1589 } else {
1590 if ($storeid) {
1591 $path = PVE::Storage::path($storecfg, $volid);
1592 $format //= qemu_img_format($scfg, $volname);
1593 } else {
1594 $path = $volid;
1595 $format //= "raw";
1596 }
1597 }
1598
1599 my $is_rbd = $path =~ m/^rbd:/;
1600
1601 my $opts = '';
1602 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1603 foreach my $o (@qemu_drive_options) {
1604 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1605 }
1606
1607 # snapshot only accepts on|off
1608 if (defined($drive->{snapshot})) {
1609 my $v = $drive->{snapshot} ? 'on' : 'off';
1610 $opts .= ",snapshot=$v";
1611 }
1612
1613 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1614 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
1615 }
1616
1617 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1618 my ($dir, $qmpname) = @$type;
1619 if (my $v = $drive->{"mbps$dir"}) {
1620 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1621 }
1622 if (my $v = $drive->{"mbps${dir}_max"}) {
1623 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1624 }
1625 if (my $v = $drive->{"bps${dir}_max_length"}) {
1626 $opts .= ",throttling.bps$qmpname-max-length=$v";
1627 }
1628 if (my $v = $drive->{"iops${dir}"}) {
1629 $opts .= ",throttling.iops$qmpname=$v";
1630 }
1631 if (my $v = $drive->{"iops${dir}_max"}) {
1632 $opts .= ",throttling.iops$qmpname-max=$v";
1633 }
1634 if (my $v = $drive->{"iops${dir}_max_length"}) {
1635 $opts .= ",throttling.iops$qmpname-max-length=$v";
1636 }
1637 }
1638
1639 if ($pbs_name) {
1640 $format = "rbd" if $is_rbd;
1641 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1642 if !$format;
1643 $opts .= ",format=alloc-track,file.driver=$format";
1644 } elsif ($format) {
1645 $opts .= ",format=$format";
1646 }
1647
1648 my $cache_direct = 0;
1649
1650 if (my $cache = $drive->{cache}) {
1651 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1652 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1653 $opts .= ",cache=none";
1654 $cache_direct = 1;
1655 }
1656
1657 # io_uring with cache mode writeback or writethrough on krbd will hang...
1658 my $rbd_no_io_uring = $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1659
1660 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1661 # sometimes, just plain disable...
1662 my $lvm_no_io_uring = $scfg && $scfg->{type} eq 'lvm';
1663
1664 if (!$drive->{aio}) {
1665 if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring) {
1666 # io_uring supports all cache modes
1667 $opts .= ",aio=io_uring";
1668 } else {
1669 # aio native works only with O_DIRECT
1670 if($cache_direct) {
1671 $opts .= ",aio=native";
1672 } else {
1673 $opts .= ",aio=threads";
1674 }
1675 }
1676 }
1677
1678 if (!drive_is_cdrom($drive)) {
1679 my $detectzeroes;
1680 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1681 $detectzeroes = 'off';
1682 } elsif ($drive->{discard}) {
1683 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1684 } else {
1685 # This used to be our default with discard not being specified:
1686 $detectzeroes = 'on';
1687 }
1688
1689 # note: 'detect-zeroes' works per blockdev and we want it to persist
1690 # after the alloc-track is removed, so put it on 'file' directly
1691 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1692 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1693 }
1694
1695 if ($pbs_name) {
1696 $opts .= ",backing=$pbs_name";
1697 $opts .= ",auto-remove=on";
1698 }
1699
1700 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1701 my $file_param = "file";
1702 if ($pbs_name) {
1703 # non-rbd drivers require the underlying file to be a seperate block
1704 # node, so add a second .file indirection
1705 $file_param .= ".file" if !$is_rbd;
1706 $file_param .= ".filename";
1707 }
1708 my $pathinfo = $path ? "$file_param=$path," : '';
1709
1710 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1711}
1712
1713sub print_pbs_blockdev {
1714 my ($pbs_conf, $pbs_name) = @_;
1715 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1716 $blockdev .= ",repository=$pbs_conf->{repository}";
1717 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1718 $blockdev .= ",archive=$pbs_conf->{archive}";
1719 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1720 return $blockdev;
1721}
1722
1723sub print_netdevice_full {
1724 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type) = @_;
1725
1726 my $device = $net->{model};
1727 if ($net->{model} eq 'virtio') {
1728 $device = 'virtio-net-pci';
1729 };
1730
1731 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1732 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1733 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1734 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1735 # and out of each queue plus one config interrupt and control vector queue
1736 my $vectors = $net->{queues} * 2 + 2;
1737 $tmpstr .= ",vectors=$vectors,mq=on";
1738 }
1739 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1740
1741 if (my $mtu = $net->{mtu}) {
1742 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1743 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1744 if ($mtu == 1) {
1745 $mtu = $bridge_mtu;
1746 } elsif ($mtu < 576) {
1747 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1748 } elsif ($mtu > $bridge_mtu) {
1749 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1750 }
1751 $tmpstr .= ",host_mtu=$mtu";
1752 } else {
1753 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1754 }
1755 }
1756
1757 if ($use_old_bios_files) {
1758 my $romfile;
1759 if ($device eq 'virtio-net-pci') {
1760 $romfile = 'pxe-virtio.rom';
1761 } elsif ($device eq 'e1000') {
1762 $romfile = 'pxe-e1000.rom';
1763 } elsif ($device eq 'e1000e') {
1764 $romfile = 'pxe-e1000e.rom';
1765 } elsif ($device eq 'ne2k') {
1766 $romfile = 'pxe-ne2k_pci.rom';
1767 } elsif ($device eq 'pcnet') {
1768 $romfile = 'pxe-pcnet.rom';
1769 } elsif ($device eq 'rtl8139') {
1770 $romfile = 'pxe-rtl8139.rom';
1771 }
1772 $tmpstr .= ",romfile=$romfile" if $romfile;
1773 }
1774
1775 return $tmpstr;
1776}
1777
1778sub print_netdev_full {
1779 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1780
1781 my $i = '';
1782 if ($netid =~ m/^net(\d+)$/) {
1783 $i = int($1);
1784 }
1785
1786 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1787
1788 my $ifname = "tap${vmid}i$i";
1789
1790 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1791 die "interface name '$ifname' is too long (max 15 character)\n"
1792 if length($ifname) >= 16;
1793
1794 my $vhostparam = '';
1795 if (is_native($arch)) {
1796 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1797 }
1798
1799 my $vmname = $conf->{name} || "vm$vmid";
1800
1801 my $netdev = "";
1802 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1803
1804 if ($net->{bridge}) {
1805 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1806 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1807 } else {
1808 $netdev = "type=user,id=$netid,hostname=$vmname";
1809 }
1810
1811 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1812
1813 return $netdev;
1814}
1815
1816my $vga_map = {
1817 'cirrus' => 'cirrus-vga',
1818 'std' => 'VGA',
1819 'vmware' => 'vmware-svga',
1820 'virtio' => 'virtio-vga',
1821};
1822
1823sub print_vga_device {
1824 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1825
1826 my $type = $vga_map->{$vga->{type}};
1827 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1828 $type = 'virtio-gpu';
1829 }
1830 my $vgamem_mb = $vga->{memory};
1831
1832 my $max_outputs = '';
1833 if ($qxlnum) {
1834 $type = $id ? 'qxl' : 'qxl-vga';
1835
1836 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1837 # set max outputs so linux can have up to 4 qxl displays with one device
1838 if (min_version($machine_version, 4, 1)) {
1839 $max_outputs = ",max_outputs=4";
1840 }
1841 }
1842 }
1843
1844 die "no devicetype for $vga->{type}\n" if !$type;
1845
1846 my $memory = "";
1847 if ($vgamem_mb) {
1848 if ($vga->{type} eq 'virtio') {
1849 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1850 $memory = ",max_hostmem=$bytes";
1851 } elsif ($qxlnum) {
1852 # from https://www.spice-space.org/multiple-monitors.html
1853 $memory = ",vgamem_mb=$vga->{memory}";
1854 my $ram = $vgamem_mb * 4;
1855 my $vram = $vgamem_mb * 2;
1856 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1857 } else {
1858 $memory = ",vgamem_mb=$vga->{memory}";
1859 }
1860 } elsif ($qxlnum && $id) {
1861 $memory = ",ram_size=67108864,vram_size=33554432";
1862 }
1863
1864 my $edidoff = "";
1865 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1866 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1867 }
1868
1869 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1870 my $vgaid = "vga" . ($id // '');
1871 my $pciaddr;
1872 if ($q35 && $vgaid eq 'vga') {
1873 # the first display uses pcie.0 bus on q35 machines
1874 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1875 } else {
1876 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1877 }
1878
1879 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1880}
1881
1882sub parse_number_sets {
1883 my ($set) = @_;
1884 my $res = [];
1885 foreach my $part (split(/;/, $set)) {
1886 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1887 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1888 push @$res, [ $1, $2 ];
1889 } else {
1890 die "invalid range: $part\n";
1891 }
1892 }
1893 return $res;
1894}
1895
1896sub parse_numa {
1897 my ($data) = @_;
1898
1899 my $res = parse_property_string($numa_fmt, $data);
1900 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1901 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
1902 return $res;
1903}
1904
1905# netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1906sub parse_net {
1907 my ($data) = @_;
1908
1909 my $res = eval { parse_property_string($net_fmt, $data) };
1910 if ($@) {
1911 warn $@;
1912 return;
1913 }
1914 if (!defined($res->{macaddr})) {
1915 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1916 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1917 }
1918 return $res;
1919}
1920
1921# ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1922sub parse_ipconfig {
1923 my ($data) = @_;
1924
1925 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1926 if ($@) {
1927 warn $@;
1928 return;
1929 }
1930
1931 if ($res->{gw} && !$res->{ip}) {
1932 warn 'gateway specified without specifying an IP address';
1933 return;
1934 }
1935 if ($res->{gw6} && !$res->{ip6}) {
1936 warn 'IPv6 gateway specified without specifying an IPv6 address';
1937 return;
1938 }
1939 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1940 warn 'gateway specified together with DHCP';
1941 return;
1942 }
1943 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1944 # gw6 + auto/dhcp
1945 warn "IPv6 gateway specified together with $res->{ip6} address";
1946 return;
1947 }
1948
1949 if (!$res->{ip} && !$res->{ip6}) {
1950 return { ip => 'dhcp', ip6 => 'dhcp' };
1951 }
1952
1953 return $res;
1954}
1955
1956sub print_net {
1957 my $net = shift;
1958
1959 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1960}
1961
1962sub add_random_macs {
1963 my ($settings) = @_;
1964
1965 foreach my $opt (keys %$settings) {
1966 next if $opt !~ m/^net(\d+)$/;
1967 my $net = parse_net($settings->{$opt});
1968 next if !$net;
1969 $settings->{$opt} = print_net($net);
1970 }
1971}
1972
1973sub vm_is_volid_owner {
1974 my ($storecfg, $vmid, $volid) = @_;
1975
1976 if ($volid !~ m|^/|) {
1977 my ($path, $owner);
1978 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
1979 if ($owner && ($owner == $vmid)) {
1980 return 1;
1981 }
1982 }
1983
1984 return;
1985}
1986
1987sub vmconfig_register_unused_drive {
1988 my ($storecfg, $vmid, $conf, $drive) = @_;
1989
1990 if (drive_is_cloudinit($drive)) {
1991 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
1992 warn $@ if $@;
1993 } elsif (!drive_is_cdrom($drive)) {
1994 my $volid = $drive->{file};
1995 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
1996 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
1997 }
1998 }
1999}
2000
2001# smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
2002my $smbios1_fmt = {
2003 uuid => {
2004 type => 'string',
2005 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
2006 format_description => 'UUID',
2007 description => "Set SMBIOS1 UUID.",
2008 optional => 1,
2009 },
2010 version => {
2011 type => 'string',
2012 pattern => '[A-Za-z0-9+\/]+={0,2}',
2013 format_description => 'Base64 encoded string',
2014 description => "Set SMBIOS1 version.",
2015 optional => 1,
2016 },
2017 serial => {
2018 type => 'string',
2019 pattern => '[A-Za-z0-9+\/]+={0,2}',
2020 format_description => 'Base64 encoded string',
2021 description => "Set SMBIOS1 serial number.",
2022 optional => 1,
2023 },
2024 manufacturer => {
2025 type => 'string',
2026 pattern => '[A-Za-z0-9+\/]+={0,2}',
2027 format_description => 'Base64 encoded string',
2028 description => "Set SMBIOS1 manufacturer.",
2029 optional => 1,
2030 },
2031 product => {
2032 type => 'string',
2033 pattern => '[A-Za-z0-9+\/]+={0,2}',
2034 format_description => 'Base64 encoded string',
2035 description => "Set SMBIOS1 product ID.",
2036 optional => 1,
2037 },
2038 sku => {
2039 type => 'string',
2040 pattern => '[A-Za-z0-9+\/]+={0,2}',
2041 format_description => 'Base64 encoded string',
2042 description => "Set SMBIOS1 SKU string.",
2043 optional => 1,
2044 },
2045 family => {
2046 type => 'string',
2047 pattern => '[A-Za-z0-9+\/]+={0,2}',
2048 format_description => 'Base64 encoded string',
2049 description => "Set SMBIOS1 family string.",
2050 optional => 1,
2051 },
2052 base64 => {
2053 type => 'boolean',
2054 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2055 optional => 1,
2056 },
2057};
2058
2059sub parse_smbios1 {
2060 my ($data) = @_;
2061
2062 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2063 warn $@ if $@;
2064 return $res;
2065}
2066
2067sub print_smbios1 {
2068 my ($smbios1) = @_;
2069 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2070}
2071
2072PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2073
2074sub parse_watchdog {
2075 my ($value) = @_;
2076
2077 return if !$value;
2078
2079 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2080 warn $@ if $@;
2081 return $res;
2082}
2083
2084sub parse_guest_agent {
2085 my ($conf) = @_;
2086
2087 return {} if !defined($conf->{agent});
2088
2089 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2090 warn $@ if $@;
2091
2092 # if the agent is disabled ignore the other potentially set properties
2093 return {} if !$res->{enabled};
2094 return $res;
2095}
2096
2097sub get_qga_key {
2098 my ($conf, $key) = @_;
2099 return undef if !defined($conf->{agent});
2100
2101 my $agent = parse_guest_agent($conf);
2102 return $agent->{$key};
2103}
2104
2105sub parse_vga {
2106 my ($value) = @_;
2107
2108 return {} if !$value;
2109 my $res = eval { parse_property_string($vga_fmt, $value) };
2110 warn $@ if $@;
2111 return $res;
2112}
2113
2114sub parse_rng {
2115 my ($value) = @_;
2116
2117 return if !$value;
2118
2119 my $res = eval { parse_property_string($rng_fmt, $value) };
2120 warn $@ if $@;
2121 return $res;
2122}
2123
2124sub parse_meta_info {
2125 my ($value) = @_;
2126
2127 return if !$value;
2128
2129 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2130 warn $@ if $@;
2131 return $res;
2132}
2133
2134sub new_meta_info_string {
2135 my () = @_; # for now do not allow to override any value
2136
2137 return PVE::JSONSchema::print_property_string(
2138 {
2139 'creation-qemu' => kvm_user_version(),
2140 ctime => "". int(time()),
2141 },
2142 $meta_info_fmt
2143 );
2144}
2145
2146sub qemu_created_version_fixups {
2147 my ($conf, $forcemachine, $kvmver) = @_;
2148
2149 my $meta = parse_meta_info($conf->{meta}) // {};
2150 my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
2151
2152 # check if we need to apply some handling for VMs that always use the latest machine version but
2153 # had a machine version transition happen that affected HW such that, e.g., an OS config change
2154 # would be required (we do not want to pin machine version for non-windows OS type)
2155 if (
2156 (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
2157 && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
2158 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
2159 && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
2160 ) {
2161 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
2162 if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
2163 # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
2164 # and thus with the predictable interface naming of systemd
2165 return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
2166 }
2167 }
2168 return;
2169}
2170
2171PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
2172sub verify_usb_device {
2173 my ($value, $noerr) = @_;
2174
2175 return $value if parse_usb_device($value);
2176
2177 return if $noerr;
2178
2179 die "unable to parse usb device\n";
2180}
2181
2182# add JSON properties for create and set function
2183sub json_config_properties {
2184 my $prop = shift;
2185
2186 my $skip_json_config_opts = {
2187 parent => 1,
2188 snaptime => 1,
2189 vmstate => 1,
2190 runningmachine => 1,
2191 runningcpu => 1,
2192 meta => 1,
2193 };
2194
2195 foreach my $opt (keys %$confdesc) {
2196 next if $skip_json_config_opts->{$opt};
2197 $prop->{$opt} = $confdesc->{$opt};
2198 }
2199
2200 return $prop;
2201}
2202
2203# return copy of $confdesc_cloudinit to generate documentation
2204sub cloudinit_config_properties {
2205
2206 return dclone($confdesc_cloudinit);
2207}
2208
2209sub check_type {
2210 my ($key, $value) = @_;
2211
2212 die "unknown setting '$key'\n" if !$confdesc->{$key};
2213
2214 my $type = $confdesc->{$key}->{type};
2215
2216 if (!defined($value)) {
2217 die "got undefined value\n";
2218 }
2219
2220 if ($value =~ m/[\n\r]/) {
2221 die "property contains a line feed\n";
2222 }
2223
2224 if ($type eq 'boolean') {
2225 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2226 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2227 die "type check ('boolean') failed - got '$value'\n";
2228 } elsif ($type eq 'integer') {
2229 return int($1) if $value =~ m/^(\d+)$/;
2230 die "type check ('integer') failed - got '$value'\n";
2231 } elsif ($type eq 'number') {
2232 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2233 die "type check ('number') failed - got '$value'\n";
2234 } elsif ($type eq 'string') {
2235 if (my $fmt = $confdesc->{$key}->{format}) {
2236 PVE::JSONSchema::check_format($fmt, $value);
2237 return $value;
2238 }
2239 $value =~ s/^\"(.*)\"$/$1/;
2240 return $value;
2241 } else {
2242 die "internal error"
2243 }
2244}
2245
2246sub destroy_vm {
2247 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2248
2249 my $conf = PVE::QemuConfig->load_config($vmid);
2250
2251 PVE::QemuConfig->check_lock($conf) if !$skiplock;
2252
2253 if ($conf->{template}) {
2254 # check if any base image is still used by a linked clone
2255 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2256 my ($ds, $drive) = @_;
2257 return if drive_is_cdrom($drive);
2258
2259 my $volid = $drive->{file};
2260 return if !$volid || $volid =~ m|^/|;
2261
2262 die "base volume '$volid' is still in use by linked cloned\n"
2263 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2264
2265 });
2266 }
2267
2268 my $volids = {};
2269 my $remove_owned_drive = sub {
2270 my ($ds, $drive) = @_;
2271 return if drive_is_cdrom($drive, 1);
2272
2273 my $volid = $drive->{file};
2274 return if !$volid || $volid =~ m|^/|;
2275 return if $volids->{$volid};
2276
2277 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2278 return if !$path || !$owner || ($owner != $vmid);
2279
2280 $volids->{$volid} = 1;
2281 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2282 warn "Could not remove disk '$volid', check manually: $@" if $@;
2283 };
2284
2285 # only remove disks owned by this VM (referenced in the config)
2286 my $include_opts = {
2287 include_unused => 1,
2288 extra_keys => ['vmstate'],
2289 };
2290 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2291
2292 for my $snap (values %{$conf->{snapshots}}) {
2293 next if !defined($snap->{vmstate});
2294 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2295 next if !defined($drive);
2296 $remove_owned_drive->('vmstate', $drive);
2297 }
2298
2299 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2300
2301 if ($purge_unreferenced) { # also remove unreferenced disk
2302 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2303 PVE::Storage::foreach_volid($vmdisks, sub {
2304 my ($volid, $sid, $volname, $d) = @_;
2305 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2306 warn $@ if $@;
2307 });
2308 }
2309
2310 if (defined $replacement_conf) {
2311 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2312 } else {
2313 PVE::QemuConfig->destroy_config($vmid);
2314 }
2315}
2316
2317sub parse_vm_config {
2318 my ($filename, $raw) = @_;
2319
2320 return if !defined($raw);
2321
2322 my $res = {
2323 digest => Digest::SHA::sha1_hex($raw),
2324 snapshots => {},
2325 pending => {},
2326 };
2327
2328 $filename =~ m|/qemu-server/(\d+)\.conf$|
2329 || die "got strange filename '$filename'";
2330
2331 my $vmid = $1;
2332
2333 my $conf = $res;
2334 my $descr;
2335 my $section = '';
2336
2337 my @lines = split(/\n/, $raw);
2338 foreach my $line (@lines) {
2339 next if $line =~ m/^\s*$/;
2340
2341 if ($line =~ m/^\[PENDING\]\s*$/i) {
2342 $section = 'pending';
2343 if (defined($descr)) {
2344 $descr =~ s/\s+$//;
2345 $conf->{description} = $descr;
2346 }
2347 $descr = undef;
2348 $conf = $res->{$section} = {};
2349 next;
2350
2351 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2352 $section = $1;
2353 if (defined($descr)) {
2354 $descr =~ s/\s+$//;
2355 $conf->{description} = $descr;
2356 }
2357 $descr = undef;
2358 $conf = $res->{snapshots}->{$section} = {};
2359 next;
2360 }
2361
2362 if ($line =~ m/^\#(.*)\s*$/) {
2363 $descr = '' if !defined($descr);
2364 $descr .= PVE::Tools::decode_text($1) . "\n";
2365 next;
2366 }
2367
2368 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2369 $descr = '' if !defined($descr);
2370 $descr .= PVE::Tools::decode_text($2);
2371 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2372 $conf->{snapstate} = $1;
2373 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2374 my $key = $1;
2375 my $value = $2;
2376 $conf->{$key} = $value;
2377 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2378 my $value = $1;
2379 if ($section eq 'pending') {
2380 $conf->{delete} = $value; # we parse this later
2381 } else {
2382 warn "vm $vmid - propertry 'delete' is only allowed in [PENDING]\n";
2383 }
2384 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2385 my $key = $1;
2386 my $value = $2;
2387 eval { $value = check_type($key, $value); };
2388 if ($@) {
2389 warn "vm $vmid - unable to parse value of '$key' - $@";
2390 } else {
2391 $key = 'ide2' if $key eq 'cdrom';
2392 my $fmt = $confdesc->{$key}->{format};
2393 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2394 my $v = parse_drive($key, $value);
2395 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2396 $v->{file} = $volid;
2397 $value = print_drive($v);
2398 } else {
2399 warn "vm $vmid - unable to parse value of '$key'\n";
2400 next;
2401 }
2402 }
2403
2404 $conf->{$key} = $value;
2405 }
2406 } else {
2407 warn "vm $vmid - unable to parse config: $line\n";
2408 }
2409 }
2410
2411 if (defined($descr)) {
2412 $descr =~ s/\s+$//;
2413 $conf->{description} = $descr;
2414 }
2415 delete $res->{snapstate}; # just to be sure
2416
2417 return $res;
2418}
2419
2420sub write_vm_config {
2421 my ($filename, $conf) = @_;
2422
2423 delete $conf->{snapstate}; # just to be sure
2424
2425 if ($conf->{cdrom}) {
2426 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2427 $conf->{ide2} = $conf->{cdrom};
2428 delete $conf->{cdrom};
2429 }
2430
2431 # we do not use 'smp' any longer
2432 if ($conf->{sockets}) {
2433 delete $conf->{smp};
2434 } elsif ($conf->{smp}) {
2435 $conf->{sockets} = $conf->{smp};
2436 delete $conf->{cores};
2437 delete $conf->{smp};
2438 }
2439
2440 my $used_volids = {};
2441
2442 my $cleanup_config = sub {
2443 my ($cref, $pending, $snapname) = @_;
2444
2445 foreach my $key (keys %$cref) {
2446 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2447 $key eq 'snapstate' || $key eq 'pending';
2448 my $value = $cref->{$key};
2449 if ($key eq 'delete') {
2450 die "propertry 'delete' is only allowed in [PENDING]\n"
2451 if !$pending;
2452 # fixme: check syntax?
2453 next;
2454 }
2455 eval { $value = check_type($key, $value); };
2456 die "unable to parse value of '$key' - $@" if $@;
2457
2458 $cref->{$key} = $value;
2459
2460 if (!$snapname && is_valid_drivename($key)) {
2461 my $drive = parse_drive($key, $value);
2462 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2463 }
2464 }
2465 };
2466
2467 &$cleanup_config($conf);
2468
2469 &$cleanup_config($conf->{pending}, 1);
2470
2471 foreach my $snapname (keys %{$conf->{snapshots}}) {
2472 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2473 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2474 }
2475
2476 # remove 'unusedX' settings if we re-add a volume
2477 foreach my $key (keys %$conf) {
2478 my $value = $conf->{$key};
2479 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2480 delete $conf->{$key};
2481 }
2482 }
2483
2484 my $generate_raw_config = sub {
2485 my ($conf, $pending) = @_;
2486
2487 my $raw = '';
2488
2489 # add description as comment to top of file
2490 if (defined(my $descr = $conf->{description})) {
2491 if ($descr) {
2492 foreach my $cl (split(/\n/, $descr)) {
2493 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2494 }
2495 } else {
2496 $raw .= "#\n" if $pending;
2497 }
2498 }
2499
2500 foreach my $key (sort keys %$conf) {
2501 next if $key =~ /^(digest|description|pending|snapshots)$/;
2502 $raw .= "$key: $conf->{$key}\n";
2503 }
2504 return $raw;
2505 };
2506
2507 my $raw = &$generate_raw_config($conf);
2508
2509 if (scalar(keys %{$conf->{pending}})){
2510 $raw .= "\n[PENDING]\n";
2511 $raw .= &$generate_raw_config($conf->{pending}, 1);
2512 }
2513
2514 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2515 $raw .= "\n[$snapname]\n";
2516 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2517 }
2518
2519 return $raw;
2520}
2521
2522sub load_defaults {
2523
2524 my $res = {};
2525
2526 # we use static defaults from our JSON schema configuration
2527 foreach my $key (keys %$confdesc) {
2528 if (defined(my $default = $confdesc->{$key}->{default})) {
2529 $res->{$key} = $default;
2530 }
2531 }
2532
2533 return $res;
2534}
2535
2536sub config_list {
2537 my $vmlist = PVE::Cluster::get_vmlist();
2538 my $res = {};
2539 return $res if !$vmlist || !$vmlist->{ids};
2540 my $ids = $vmlist->{ids};
2541 my $nodename = nodename();
2542
2543 foreach my $vmid (keys %$ids) {
2544 my $d = $ids->{$vmid};
2545 next if !$d->{node} || $d->{node} ne $nodename;
2546 next if !$d->{type} || $d->{type} ne 'qemu';
2547 $res->{$vmid}->{exists} = 1;
2548 }
2549 return $res;
2550}
2551
2552# test if VM uses local resources (to prevent migration)
2553sub check_local_resources {
2554 my ($conf, $noerr) = @_;
2555
2556 my @loc_res = ();
2557
2558 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2559 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2560
2561 push @loc_res, "ivshmem" if $conf->{ivshmem};
2562
2563 foreach my $k (keys %$conf) {
2564 next if $k =~ m/^usb/ && ($conf->{$k} =~ m/^spice(?![^,])/);
2565 # sockets are safe: they will recreated be on the target side post-migrate
2566 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2567 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2568 }
2569
2570 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2571
2572 return \@loc_res;
2573}
2574
2575# check if used storages are available on all nodes (use by migrate)
2576sub check_storage_availability {
2577 my ($storecfg, $conf, $node) = @_;
2578
2579 PVE::QemuConfig->foreach_volume($conf, sub {
2580 my ($ds, $drive) = @_;
2581
2582 my $volid = $drive->{file};
2583 return if !$volid;
2584
2585 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2586 return if !$sid;
2587
2588 # check if storage is available on both nodes
2589 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2590 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2591
2592 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2593
2594 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2595 if !$scfg->{content}->{$vtype};
2596 });
2597}
2598
2599# list nodes where all VM images are available (used by has_feature API)
2600sub shared_nodes {
2601 my ($conf, $storecfg) = @_;
2602
2603 my $nodelist = PVE::Cluster::get_nodelist();
2604 my $nodehash = { map { $_ => 1 } @$nodelist };
2605 my $nodename = nodename();
2606
2607 PVE::QemuConfig->foreach_volume($conf, sub {
2608 my ($ds, $drive) = @_;
2609
2610 my $volid = $drive->{file};
2611 return if !$volid;
2612
2613 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2614 if ($storeid) {
2615 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2616 if ($scfg->{disable}) {
2617 $nodehash = {};
2618 } elsif (my $avail = $scfg->{nodes}) {
2619 foreach my $node (keys %$nodehash) {
2620 delete $nodehash->{$node} if !$avail->{$node};
2621 }
2622 } elsif (!$scfg->{shared}) {
2623 foreach my $node (keys %$nodehash) {
2624 delete $nodehash->{$node} if $node ne $nodename
2625 }
2626 }
2627 }
2628 });
2629
2630 return $nodehash
2631}
2632
2633sub check_local_storage_availability {
2634 my ($conf, $storecfg) = @_;
2635
2636 my $nodelist = PVE::Cluster::get_nodelist();
2637 my $nodehash = { map { $_ => {} } @$nodelist };
2638
2639 PVE::QemuConfig->foreach_volume($conf, sub {
2640 my ($ds, $drive) = @_;
2641
2642 my $volid = $drive->{file};
2643 return if !$volid;
2644
2645 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2646 if ($storeid) {
2647 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2648
2649 if ($scfg->{disable}) {
2650 foreach my $node (keys %$nodehash) {
2651 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2652 }
2653 } elsif (my $avail = $scfg->{nodes}) {
2654 foreach my $node (keys %$nodehash) {
2655 if (!$avail->{$node}) {
2656 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2657 }
2658 }
2659 }
2660 }
2661 });
2662
2663 foreach my $node (values %$nodehash) {
2664 if (my $unavail = $node->{unavailable_storages}) {
2665 $node->{unavailable_storages} = [ sort keys %$unavail ];
2666 }
2667 }
2668
2669 return $nodehash
2670}
2671
2672# Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2673sub check_running {
2674 my ($vmid, $nocheck, $node) = @_;
2675
2676 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2677 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2678}
2679
2680sub vzlist {
2681
2682 my $vzlist = config_list();
2683
2684 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2685
2686 while (defined(my $de = $fd->read)) {
2687 next if $de !~ m/^(\d+)\.pid$/;
2688 my $vmid = $1;
2689 next if !defined($vzlist->{$vmid});
2690 if (my $pid = check_running($vmid)) {
2691 $vzlist->{$vmid}->{pid} = $pid;
2692 }
2693 }
2694
2695 return $vzlist;
2696}
2697
2698our $vmstatus_return_properties = {
2699 vmid => get_standard_option('pve-vmid'),
2700 status => {
2701 description => "Qemu process status.",
2702 type => 'string',
2703 enum => ['stopped', 'running'],
2704 },
2705 maxmem => {
2706 description => "Maximum memory in bytes.",
2707 type => 'integer',
2708 optional => 1,
2709 renderer => 'bytes',
2710 },
2711 maxdisk => {
2712 description => "Root disk size in bytes.",
2713 type => 'integer',
2714 optional => 1,
2715 renderer => 'bytes',
2716 },
2717 name => {
2718 description => "VM name.",
2719 type => 'string',
2720 optional => 1,
2721 },
2722 qmpstatus => {
2723 description => "Qemu QMP agent status.",
2724 type => 'string',
2725 optional => 1,
2726 },
2727 pid => {
2728 description => "PID of running qemu process.",
2729 type => 'integer',
2730 optional => 1,
2731 },
2732 uptime => {
2733 description => "Uptime.",
2734 type => 'integer',
2735 optional => 1,
2736 renderer => 'duration',
2737 },
2738 cpus => {
2739 description => "Maximum usable CPUs.",
2740 type => 'number',
2741 optional => 1,
2742 },
2743 lock => {
2744 description => "The current config lock, if any.",
2745 type => 'string',
2746 optional => 1,
2747 },
2748 tags => {
2749 description => "The current configured tags, if any",
2750 type => 'string',
2751 optional => 1,
2752 },
2753 'running-machine' => {
2754 description => "The currently running machine type (if running).",
2755 type => 'string',
2756 optional => 1,
2757 },
2758 'running-qemu' => {
2759 description => "The currently running QEMU version (if running).",
2760 type => 'string',
2761 optional => 1,
2762 },
2763};
2764
2765my $last_proc_pid_stat;
2766
2767# get VM status information
2768# This must be fast and should not block ($full == false)
2769# We only query KVM using QMP if $full == true (this can be slow)
2770sub vmstatus {
2771 my ($opt_vmid, $full) = @_;
2772
2773 my $res = {};
2774
2775 my $storecfg = PVE::Storage::config();
2776
2777 my $list = vzlist();
2778 my $defaults = load_defaults();
2779
2780 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2781
2782 my $cpucount = $cpuinfo->{cpus} || 1;
2783
2784 foreach my $vmid (keys %$list) {
2785 next if $opt_vmid && ($vmid ne $opt_vmid);
2786
2787 my $conf = PVE::QemuConfig->load_config($vmid);
2788
2789 my $d = { vmid => int($vmid) };
2790 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2791
2792 # fixme: better status?
2793 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2794
2795 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2796 if (defined($size)) {
2797 $d->{disk} = 0; # no info available
2798 $d->{maxdisk} = $size;
2799 } else {
2800 $d->{disk} = 0;
2801 $d->{maxdisk} = 0;
2802 }
2803
2804 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2805 * ($conf->{cores} || $defaults->{cores});
2806 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2807 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2808
2809 $d->{name} = $conf->{name} || "VM $vmid";
2810 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2811 : $defaults->{memory}*(1024*1024);
2812
2813 if ($conf->{balloon}) {
2814 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2815 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2816 : $defaults->{shares};
2817 }
2818
2819 $d->{uptime} = 0;
2820 $d->{cpu} = 0;
2821 $d->{mem} = 0;
2822
2823 $d->{netout} = 0;
2824 $d->{netin} = 0;
2825
2826 $d->{diskread} = 0;
2827 $d->{diskwrite} = 0;
2828
2829 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2830
2831 $d->{serial} = 1 if conf_has_serial($conf);
2832 $d->{lock} = $conf->{lock} if $conf->{lock};
2833 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2834
2835 $res->{$vmid} = $d;
2836 }
2837
2838 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2839 foreach my $dev (keys %$netdev) {
2840 next if $dev !~ m/^tap([1-9]\d*)i/;
2841 my $vmid = $1;
2842 my $d = $res->{$vmid};
2843 next if !$d;
2844
2845 $d->{netout} += $netdev->{$dev}->{receive};
2846 $d->{netin} += $netdev->{$dev}->{transmit};
2847
2848 if ($full) {
2849 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2850 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2851 }
2852
2853 }
2854
2855 my $ctime = gettimeofday;
2856
2857 foreach my $vmid (keys %$list) {
2858
2859 my $d = $res->{$vmid};
2860 my $pid = $d->{pid};
2861 next if !$pid;
2862
2863 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2864 next if !$pstat; # not running
2865
2866 my $used = $pstat->{utime} + $pstat->{stime};
2867
2868 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2869
2870 if ($pstat->{vsize}) {
2871 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2872 }
2873
2874 my $old = $last_proc_pid_stat->{$pid};
2875 if (!$old) {
2876 $last_proc_pid_stat->{$pid} = {
2877 time => $ctime,
2878 used => $used,
2879 cpu => 0,
2880 };
2881 next;
2882 }
2883
2884 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
2885
2886 if ($dtime > 1000) {
2887 my $dutime = $used - $old->{used};
2888
2889 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
2890 $last_proc_pid_stat->{$pid} = {
2891 time => $ctime,
2892 used => $used,
2893 cpu => $d->{cpu},
2894 };
2895 } else {
2896 $d->{cpu} = $old->{cpu};
2897 }
2898 }
2899
2900 return $res if !$full;
2901
2902 my $qmpclient = PVE::QMPClient->new();
2903
2904 my $ballooncb = sub {
2905 my ($vmid, $resp) = @_;
2906
2907 my $info = $resp->{'return'};
2908 return if !$info->{max_mem};
2909
2910 my $d = $res->{$vmid};
2911
2912 # use memory assigned to VM
2913 $d->{maxmem} = $info->{max_mem};
2914 $d->{balloon} = $info->{actual};
2915
2916 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
2917 $d->{mem} = $info->{total_mem} - $info->{free_mem};
2918 $d->{freemem} = $info->{free_mem};
2919 }
2920
2921 $d->{ballooninfo} = $info;
2922 };
2923
2924 my $blockstatscb = sub {
2925 my ($vmid, $resp) = @_;
2926 my $data = $resp->{'return'} || [];
2927 my $totalrdbytes = 0;
2928 my $totalwrbytes = 0;
2929
2930 for my $blockstat (@$data) {
2931 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
2932 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
2933
2934 $blockstat->{device} =~ s/drive-//;
2935 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
2936 }
2937 $res->{$vmid}->{diskread} = $totalrdbytes;
2938 $res->{$vmid}->{diskwrite} = $totalwrbytes;
2939 };
2940
2941 my $machinecb = sub {
2942 my ($vmid, $resp) = @_;
2943 my $data = $resp->{'return'} || [];
2944
2945 $res->{$vmid}->{'running-machine'} =
2946 PVE::QemuServer::Machine::current_from_query_machines($data);
2947 };
2948
2949 my $versioncb = sub {
2950 my ($vmid, $resp) = @_;
2951 my $data = $resp->{'return'} // {};
2952 my $version = 'unknown';
2953
2954 if (my $v = $data->{qemu}) {
2955 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
2956 }
2957
2958 $res->{$vmid}->{'running-qemu'} = $version;
2959 };
2960
2961 my $statuscb = sub {
2962 my ($vmid, $resp) = @_;
2963
2964 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
2965 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
2966 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
2967 # this fails if ballon driver is not loaded, so this must be
2968 # the last commnand (following command are aborted if this fails).
2969 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
2970
2971 my $status = 'unknown';
2972 if (!defined($status = $resp->{'return'}->{status})) {
2973 warn "unable to get VM status\n";
2974 return;
2975 }
2976
2977 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
2978 };
2979
2980 foreach my $vmid (keys %$list) {
2981 next if $opt_vmid && ($vmid ne $opt_vmid);
2982 next if !$res->{$vmid}->{pid}; # not running
2983 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
2984 }
2985
2986 $qmpclient->queue_execute(undef, 2);
2987
2988 foreach my $vmid (keys %$list) {
2989 next if $opt_vmid && ($vmid ne $opt_vmid);
2990 next if !$res->{$vmid}->{pid}; #not running
2991
2992 # we can't use the $qmpclient since it might have already aborted on
2993 # 'query-balloon', but this might also fail for older versions...
2994 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
2995 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
2996 }
2997
2998 foreach my $vmid (keys %$list) {
2999 next if $opt_vmid && ($vmid ne $opt_vmid);
3000 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
3001 }
3002
3003 return $res;
3004}
3005
3006sub conf_has_serial {
3007 my ($conf) = @_;
3008
3009 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3010 if ($conf->{"serial$i"}) {
3011 return 1;
3012 }
3013 }
3014
3015 return 0;
3016}
3017
3018sub conf_has_audio {
3019 my ($conf, $id) = @_;
3020
3021 $id //= 0;
3022 my $audio = $conf->{"audio$id"};
3023 return if !defined($audio);
3024
3025 my $audioproperties = parse_property_string($audio_fmt, $audio);
3026 my $audiodriver = $audioproperties->{driver} // 'spice';
3027
3028 return {
3029 dev => $audioproperties->{device},
3030 dev_id => "audiodev$id",
3031 backend => $audiodriver,
3032 backend_id => "$audiodriver-backend${id}",
3033 };
3034}
3035
3036sub audio_devs {
3037 my ($audio, $audiopciaddr, $machine_version) = @_;
3038
3039 my $devs = [];
3040
3041 my $id = $audio->{dev_id};
3042 my $audiodev = "";
3043 if (min_version($machine_version, 4, 2)) {
3044 $audiodev = ",audiodev=$audio->{backend_id}";
3045 }
3046
3047 if ($audio->{dev} eq 'AC97') {
3048 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
3049 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3050 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
3051 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3052 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
3053 } else {
3054 die "unkown audio device '$audio->{dev}', implement me!";
3055 }
3056
3057 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3058
3059 return $devs;
3060}
3061
3062sub get_tpm_paths {
3063 my ($vmid) = @_;
3064 return {
3065 socket => "/var/run/qemu-server/$vmid.swtpm",
3066 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3067 };
3068}
3069
3070sub add_tpm_device {
3071 my ($vmid, $devices, $conf) = @_;
3072
3073 return if !$conf->{tpmstate0};
3074
3075 my $paths = get_tpm_paths($vmid);
3076
3077 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3078 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3079 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3080}
3081
3082sub start_swtpm {
3083 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3084
3085 return if !$tpmdrive;
3086
3087 my $state;
3088 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3089 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3090 if ($storeid) {
3091 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3092 } else {
3093 $state = $tpm->{file};
3094 }
3095
3096 my $paths = get_tpm_paths($vmid);
3097
3098 # during migration, we will get state from remote
3099 #
3100 if (!$migration) {
3101 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3102 my $setup_cmd = [
3103 "swtpm_setup",
3104 "--tpmstate",
3105 "file://$state",
3106 "--createek",
3107 "--create-ek-cert",
3108 "--create-platform-cert",
3109 "--lock-nvram",
3110 "--config",
3111 "/etc/swtpm_setup.conf", # do not use XDG configs
3112 "--runas",
3113 "0", # force creation as root, error if not possible
3114 "--not-overwrite", # ignore existing state, do not modify
3115 ];
3116
3117 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3118 # TPM 2.0 supports ECC crypto, use if possible
3119 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3120
3121 run_command($setup_cmd, outfunc => sub {
3122 print "swtpm_setup: $1\n";
3123 });
3124 }
3125
3126 my $emulator_cmd = [
3127 "swtpm",
3128 "socket",
3129 "--tpmstate",
3130 "backend-uri=file://$state,mode=0600",
3131 "--ctrl",
3132 "type=unixio,path=$paths->{socket},mode=0600",
3133 "--pid",
3134 "file=$paths->{pid}",
3135 "--terminate", # terminate on QEMU disconnect
3136 "--daemon",
3137 ];
3138 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3139 run_command($emulator_cmd, outfunc => sub { print $1; });
3140
3141 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3142 while (! -e $paths->{pid}) {
3143 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3144 usleep(50_000);
3145 }
3146
3147 # return untainted PID of swtpm daemon so it can be killed on error
3148 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3149 return $1;
3150}
3151
3152sub vga_conf_has_spice {
3153 my ($vga) = @_;
3154
3155 my $vgaconf = parse_vga($vga);
3156 my $vgatype = $vgaconf->{type};
3157 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3158
3159 return $1 || 1;
3160}
3161
3162sub is_native($) {
3163 my ($arch) = @_;
3164 return get_host_arch() eq $arch;
3165}
3166
3167sub get_vm_arch {
3168 my ($conf) = @_;
3169 return $conf->{arch} // get_host_arch();
3170}
3171
3172my $default_machines = {
3173 x86_64 => 'pc',
3174 aarch64 => 'virt',
3175};
3176
3177sub get_installed_machine_version {
3178 my ($kvmversion) = @_;
3179 $kvmversion = kvm_user_version() if !defined($kvmversion);
3180 $kvmversion =~ m/^(\d+\.\d+)/;
3181 return $1;
3182}
3183
3184sub windows_get_pinned_machine_version {
3185 my ($machine, $base_version, $kvmversion) = @_;
3186
3187 my $pin_version = $base_version;
3188 if (!defined($base_version) ||
3189 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3190 ) {
3191 $pin_version = get_installed_machine_version($kvmversion);
3192 }
3193 if (!$machine || $machine eq 'pc') {
3194 $machine = "pc-i440fx-$pin_version";
3195 } elsif ($machine eq 'q35') {
3196 $machine = "pc-q35-$pin_version";
3197 } elsif ($machine eq 'virt') {
3198 $machine = "virt-$pin_version";
3199 } else {
3200 warn "unknown machine type '$machine', not touching that!\n";
3201 }
3202
3203 return $machine;
3204}
3205
3206sub get_vm_machine {
3207 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3208
3209 my $machine = $forcemachine || $conf->{machine};
3210
3211 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3212 $kvmversion //= kvm_user_version();
3213 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3214 # layout which confuses windows quite a bit and may result in various regressions..
3215 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3216 if (windows_version($conf->{ostype})) {
3217 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3218 }
3219 $arch //= 'x86_64';
3220 $machine ||= $default_machines->{$arch};
3221 if ($add_pve_version) {
3222 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3223 $machine .= "+pve$pvever";
3224 }
3225 }
3226
3227 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3228 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3229 $machine = $1 if $is_pxe;
3230
3231 # for version-pinned machines that do not include a pve-version (e.g.
3232 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3233 $machine .= '+pve0';
3234
3235 $machine .= '.pxe' if $is_pxe;
3236 }
3237
3238 return $machine;
3239}
3240
3241sub get_ovmf_files($$$) {
3242 my ($arch, $efidisk, $smm) = @_;
3243
3244 my $types = $OVMF->{$arch}
3245 or die "no OVMF images known for architecture '$arch'\n";
3246
3247 my $type = 'default';
3248 if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3249 $type = $smm ? "4m" : "4m-no-smm";
3250 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3251 }
3252
3253 return $types->{$type}->@*;
3254}
3255
3256my $Arch2Qemu = {
3257 aarch64 => '/usr/bin/qemu-system-aarch64',
3258 x86_64 => '/usr/bin/qemu-system-x86_64',
3259};
3260sub get_command_for_arch($) {
3261 my ($arch) = @_;
3262 return '/usr/bin/kvm' if is_native($arch);
3263
3264 my $cmd = $Arch2Qemu->{$arch}
3265 or die "don't know how to emulate architecture '$arch'\n";
3266 return $cmd;
3267}
3268
3269# To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3270# to use in a QEMU command line (-cpu element), first array_intersect the result
3271# of query_supported_ with query_understood_. This is necessary because:
3272#
3273# a) query_understood_ returns flags the host cannot use and
3274# b) query_supported_ (rather the QMP call) doesn't actually return CPU
3275# flags, but CPU settings - with most of them being flags. Those settings
3276# (and some flags, curiously) cannot be specified as a "-cpu" argument.
3277#
3278# query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3279# expensive. If you need the value returned from this, you can get it much
3280# cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3281# $accel being 'kvm' or 'tcg'.
3282#
3283# pvestatd calls this function on startup and whenever the QEMU/KVM version
3284# changes, automatically populating pmxcfs.
3285#
3286# Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3287# since kvm and tcg machines support different flags
3288#
3289sub query_supported_cpu_flags {
3290 my ($arch) = @_;
3291
3292 $arch //= get_host_arch();
3293 my $default_machine = $default_machines->{$arch};
3294
3295 my $flags = {};
3296
3297 # FIXME: Once this is merged, the code below should work for ARM as well:
3298 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3299 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3300 $arch eq "aarch64";
3301
3302 my $kvm_supported = defined(kvm_version());
3303 my $qemu_cmd = get_command_for_arch($arch);
3304 my $fakevmid = -1;
3305 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3306
3307 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3308 my $query_supported_run_qemu = sub {
3309 my ($kvm) = @_;
3310
3311 my $flags = {};
3312 my $cmd = [
3313 $qemu_cmd,
3314 '-machine', $default_machine,
3315 '-display', 'none',
3316 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3317 '-mon', 'chardev=qmp,mode=control',
3318 '-pidfile', $pidfile,
3319 '-S', '-daemonize'
3320 ];
3321
3322 if (!$kvm) {
3323 push @$cmd, '-accel', 'tcg';
3324 }
3325
3326 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3327 die "QEMU flag querying VM exited with code " . $rc if $rc;
3328
3329 eval {
3330 my $cmd_result = mon_cmd(
3331 $fakevmid,
3332 'query-cpu-model-expansion',
3333 type => 'full',
3334 model => { name => 'host' }
3335 );
3336
3337 my $props = $cmd_result->{model}->{props};
3338 foreach my $prop (keys %$props) {
3339 next if $props->{$prop} ne '1';
3340 # QEMU returns some flags multiple times, with '_', '.' or '-'
3341 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3342 # We only keep those with underscores, to match /proc/cpuinfo
3343 $prop =~ s/\.|-/_/g;
3344 $flags->{$prop} = 1;
3345 }
3346 };
3347 my $err = $@;
3348
3349 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3350 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3351
3352 die $err if $err;
3353
3354 return [ sort keys %$flags ];
3355 };
3356
3357 # We need to query QEMU twice, since KVM and TCG have different supported flags
3358 PVE::QemuConfig->lock_config($fakevmid, sub {
3359 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3360 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3361
3362 if ($kvm_supported) {
3363 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3364 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3365 }
3366 });
3367
3368 return $flags;
3369}
3370
3371# Understood CPU flags are written to a file at 'pve-qemu' compile time
3372my $understood_cpu_flag_dir = "/usr/share/kvm";
3373sub query_understood_cpu_flags {
3374 my $arch = get_host_arch();
3375 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3376
3377 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3378 if ! -e $filepath;
3379
3380 my $raw = file_get_contents($filepath);
3381 $raw =~ s/^\s+|\s+$//g;
3382 my @flags = split(/\s+/, $raw);
3383
3384 return \@flags;
3385}
3386
3387my sub get_cpuunits {
3388 my ($conf) = @_;
3389 return $conf->{cpuunits} // (PVE::CGroup::cgroup_mode() == 2 ? 100 : 1024);
3390}
3391
3392# Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
3393# anymore. But smm=off seems to be required when using SeaBIOS and serial display.
3394my sub should_disable_smm {
3395 my ($conf, $vga) = @_;
3396
3397 return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
3398 $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
3399}
3400
3401sub config_to_command {
3402 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3403 $pbs_backing) = @_;
3404
3405 my $cmd = [];
3406 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3407 my $devices = [];
3408 my $bridges = {};
3409 my $ostype = $conf->{ostype};
3410 my $winversion = windows_version($ostype);
3411 my $kvm = $conf->{kvm};
3412 my $nodename = nodename();
3413
3414 my $arch = get_vm_arch($conf);
3415 my $kvm_binary = get_command_for_arch($arch);
3416 my $kvmver = kvm_user_version($kvm_binary);
3417
3418 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3419 $kvmver //= "undefined";
3420 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3421 }
3422
3423 my $add_pve_version = min_version($kvmver, 4, 1);
3424
3425 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3426 my $machine_version = extract_version($machine_type, $kvmver);
3427 $kvm //= 1 if is_native($arch);
3428
3429 $machine_version =~ m/(\d+)\.(\d+)/;
3430 my ($machine_major, $machine_minor) = ($1, $2);
3431
3432 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3433 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3434 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3435 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3436 ." please upgrade node '$nodename'\n"
3437 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3438 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3439 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3440 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3441 ." node '$nodename'\n";
3442 }
3443
3444 # if a specific +pve version is required for a feature, use $version_guard
3445 # instead of min_version to allow machines to be run with the minimum
3446 # required version
3447 my $required_pve_version = 0;
3448 my $version_guard = sub {
3449 my ($major, $minor, $pve) = @_;
3450 return 0 if !min_version($machine_version, $major, $minor, $pve);
3451 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3452 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3453 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3454 return 1;
3455 };
3456
3457 if ($kvm && !defined kvm_version()) {
3458 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3459 ." or enable in BIOS.\n";
3460 }
3461
3462 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3463 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3464 my $use_old_bios_files = undef;
3465 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3466
3467 my $cpuunits = get_cpuunits($conf);
3468
3469 push @$cmd, $kvm_binary;
3470
3471 push @$cmd, '-id', $vmid;
3472
3473 my $vmname = $conf->{name} || "vm$vmid";
3474
3475 push @$cmd, '-name', $vmname;
3476
3477 push @$cmd, '-no-shutdown';
3478
3479 my $use_virtio = 0;
3480
3481 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3482 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3483 push @$cmd, '-mon', "chardev=qmp,mode=control";
3484
3485 if (min_version($machine_version, 2, 12)) {
3486 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3487 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3488 }
3489
3490 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3491
3492 push @$cmd, '-daemonize';
3493
3494 if ($conf->{smbios1}) {
3495 my $smbios_conf = parse_smbios1($conf->{smbios1});
3496 if ($smbios_conf->{base64}) {
3497 # Do not pass base64 flag to qemu
3498 delete $smbios_conf->{base64};
3499 my $smbios_string = "";
3500 foreach my $key (keys %$smbios_conf) {
3501 my $value;
3502 if ($key eq "uuid") {
3503 $value = $smbios_conf->{uuid}
3504 } else {
3505 $value = decode_base64($smbios_conf->{$key});
3506 }
3507 # qemu accepts any binary data, only commas need escaping by double comma
3508 $value =~ s/,/,,/g;
3509 $smbios_string .= "," . $key . "=" . $value if $value;
3510 }
3511 push @$cmd, '-smbios', "type=1" . $smbios_string;
3512 } else {
3513 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3514 }
3515 }
3516
3517 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3518 my $d;
3519 if (my $efidisk = $conf->{efidisk0}) {
3520 $d = parse_drive('efidisk0', $efidisk);
3521 }
3522
3523 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3524 die "uefi base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3525
3526 my ($path, $format);
3527 my $read_only_str = '';
3528 if ($d) {
3529 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3530 $format = $d->{format};
3531 if ($storeid) {
3532 $path = PVE::Storage::path($storecfg, $d->{file});
3533 if (!defined($format)) {
3534 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3535 $format = qemu_img_format($scfg, $volname);
3536 }
3537 } else {
3538 $path = $d->{file};
3539 die "efidisk format must be specified\n"
3540 if !defined($format);
3541 }
3542
3543 $read_only_str = ',readonly=on' if drive_is_read_only($conf, $d);
3544 } else {
3545 warn "no efidisk configured! Using temporary efivars disk.\n";
3546 $path = "/tmp/$vmid-ovmf.fd";
3547 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3548 $format = 'raw';
3549 }
3550
3551 my $size_str = "";
3552
3553 if ($format eq 'raw' && $version_guard->(4, 1, 2)) {
3554 $size_str = ",size=" . (-s $ovmf_vars);
3555 }
3556
3557 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3558 my $cache = "";
3559 if ($path =~ m/^rbd:/) {
3560 $cache = ',cache=writeback';
3561 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3562 }
3563
3564 push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code";
3565 push @$cmd, '-drive', "if=pflash,unit=1$cache,format=$format,id=drive-efidisk0$size_str,file=${path}${read_only_str}";
3566 }
3567
3568 if ($q35) { # tell QEMU to load q35 config early
3569 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3570 if (min_version($machine_version, 4, 0)) {
3571 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3572 } else {
3573 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3574 }
3575 }
3576
3577 if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
3578 push @$cmd, $fixups->@*;
3579 }
3580
3581 if ($conf->{vmgenid}) {
3582 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3583 }
3584
3585 # add usb controllers
3586 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3587 $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES);
3588 push @$devices, @usbcontrollers if @usbcontrollers;
3589 my $vga = parse_vga($conf->{vga});
3590
3591 my $qxlnum = vga_conf_has_spice($conf->{vga});
3592 $vga->{type} = 'qxl' if $qxlnum;
3593
3594 if (!$vga->{type}) {
3595 if ($arch eq 'aarch64') {
3596 $vga->{type} = 'virtio';
3597 } elsif (min_version($machine_version, 2, 9)) {
3598 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3599 } else {
3600 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3601 }
3602 }
3603
3604 # enable absolute mouse coordinates (needed by vnc)
3605 my $tablet = $conf->{tablet};
3606 if (!defined($tablet)) {
3607 $tablet = $defaults->{tablet};
3608 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3609 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3610 }
3611
3612 if ($tablet) {
3613 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3614 my $kbd = print_keyboarddevice_full($conf, $arch);
3615 push @$devices, '-device', $kbd if defined($kbd);
3616 }
3617
3618 my $bootorder = device_bootorder($conf);
3619
3620 # host pci device passthrough
3621 my ($kvm_off, $gpu_passthrough, $legacy_igd) = PVE::QemuServer::PCI::print_hostpci_devices(
3622 $vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder);
3623
3624 # usb devices
3625 my $usb_dev_features = {};
3626 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3627
3628 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3629 $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder);
3630 push @$devices, @usbdevices if @usbdevices;
3631
3632 # serial devices
3633 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3634 my $path = $conf->{"serial$i"} or next;
3635 if ($path eq 'socket') {
3636 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3637 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3638 # On aarch64, serial0 is the UART device. Qemu only allows
3639 # connecting UART devices via the '-serial' command line, as
3640 # the device has a fixed slot on the hardware...
3641 if ($arch eq 'aarch64' && $i == 0) {
3642 push @$devices, '-serial', "chardev:serial$i";
3643 } else {
3644 push @$devices, '-device', "isa-serial,chardev=serial$i";
3645 }
3646 } else {
3647 die "no such serial device\n" if ! -c $path;
3648 push @$devices, '-chardev', "tty,id=serial$i,path=$path";
3649 push @$devices, '-device', "isa-serial,chardev=serial$i";
3650 }
3651 }
3652
3653 # parallel devices
3654 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3655 if (my $path = $conf->{"parallel$i"}) {
3656 die "no such parallel device\n" if ! -c $path;
3657 my $devtype = $path =~ m!^/dev/usb/lp! ? 'tty' : 'parport';
3658 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3659 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3660 }
3661 }
3662
3663 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3664 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3665 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3666 push @$devices, @$audio_devs;
3667 }
3668
3669 add_tpm_device($vmid, $devices, $conf);
3670
3671 my $sockets = 1;
3672 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3673 $sockets = $conf->{sockets} if $conf->{sockets};
3674
3675 my $cores = $conf->{cores} || 1;
3676
3677 my $maxcpus = $sockets * $cores;
3678
3679 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3680
3681 my $allowed_vcpus = $cpuinfo->{cpus};
3682
3683 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3684
3685 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3686 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3687 for (my $i = 2; $i <= $vcpus; $i++) {
3688 my $cpustr = print_cpu_device($conf,$i);
3689 push @$cmd, '-device', $cpustr;
3690 }
3691
3692 } else {
3693
3694 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3695 }
3696 push @$cmd, '-nodefaults';
3697
3698 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3699
3700 push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3701
3702 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3703
3704 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3705 push @$devices, '-device', print_vga_device(
3706 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3707 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3708 push @$cmd, '-vnc', "unix:$socket,password=on";
3709 } else {
3710 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3711 push @$cmd, '-nographic';
3712 }
3713
3714 # time drift fix
3715 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3716 my $useLocaltime = $conf->{localtime};
3717
3718 if ($winversion >= 5) { # windows
3719 $useLocaltime = 1 if !defined($conf->{localtime});
3720
3721 # use time drift fix when acpi is enabled
3722 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3723 $tdf = 1 if !defined($conf->{tdf});
3724 }
3725 }
3726
3727 if ($winversion >= 6) {
3728 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3729 push @$cmd, '-no-hpet';
3730 }
3731
3732 push @$rtcFlags, 'driftfix=slew' if $tdf;
3733
3734 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3735 push @$rtcFlags, "base=$conf->{startdate}";
3736 } elsif ($useLocaltime) {
3737 push @$rtcFlags, 'base=localtime';
3738 }
3739
3740 if ($forcecpu) {
3741 push @$cmd, '-cpu', $forcecpu;
3742 } else {
3743 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3744 }
3745
3746 PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
3747
3748 push @$cmd, '-S' if $conf->{freeze};
3749
3750 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3751
3752 my $guest_agent = parse_guest_agent($conf);
3753
3754 if ($guest_agent->{enabled}) {
3755 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3756 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3757
3758 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3759 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3760 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3761 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3762 } elsif ($guest_agent->{type} eq 'isa') {
3763 push @$devices, '-device', "isa-serial,chardev=qga0";
3764 }
3765 }
3766
3767 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3768 if ($rng && $version_guard->(4, 1, 2)) {
3769 check_rng_source($rng->{source});
3770
3771 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3772 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3773 my $limiter_str = "";
3774 if ($max_bytes) {
3775 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3776 }
3777
3778 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3779 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3780 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3781 }
3782
3783 my $spice_port;
3784
3785 if ($qxlnum) {
3786 if ($qxlnum > 1) {
3787 if ($winversion){
3788 for (my $i = 1; $i < $qxlnum; $i++){
3789 push @$devices, '-device', print_vga_device(
3790 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3791 }
3792 } else {
3793 # assume other OS works like Linux
3794 my ($ram, $vram) = ("134217728", "67108864");
3795 if ($vga->{memory}) {
3796 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3797 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3798 }
3799 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3800 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3801 }
3802 }
3803
3804 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3805
3806 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3807 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3808 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3809
3810 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3811 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3812 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3813
3814 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3815 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3816
3817 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3818 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3819 if ($spice_enhancement->{foldersharing}) {
3820 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3821 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3822 }
3823
3824 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3825 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3826 if $spice_enhancement->{videostreaming};
3827
3828 push @$devices, '-spice', "$spice_opts";
3829 }
3830
3831 # enable balloon by default, unless explicitly disabled
3832 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3833 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3834 push @$devices, '-device', "virtio-balloon-pci,id=balloon0$pciaddr";
3835 }
3836
3837 if ($conf->{watchdog}) {
3838 my $wdopts = parse_watchdog($conf->{watchdog});
3839 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3840 my $watchdog = $wdopts->{model} || 'i6300esb';
3841 push @$devices, '-device', "$watchdog$pciaddr";
3842 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3843 }
3844
3845 my $vollist = [];
3846 my $scsicontroller = {};
3847 my $ahcicontroller = {};
3848 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3849
3850 # Add iscsi initiator name if available
3851 if (my $initiator = get_initiator_name()) {
3852 push @$devices, '-iscsi', "initiator-name=$initiator";
3853 }
3854
3855 PVE::QemuConfig->foreach_volume($conf, sub {
3856 my ($ds, $drive) = @_;
3857
3858 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3859 check_volume_storage_type($storecfg, $drive->{file});
3860 push @$vollist, $drive->{file};
3861 }
3862
3863 # ignore efidisk here, already added in bios/fw handling code above
3864 return if $drive->{interface} eq 'efidisk';
3865 # similar for TPM
3866 return if $drive->{interface} eq 'tpmstate';
3867
3868 $use_virtio = 1 if $ds =~ m/^virtio/;
3869
3870 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3871
3872 if ($drive->{interface} eq 'virtio'){
3873 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
3874 }
3875
3876 if ($drive->{interface} eq 'scsi') {
3877
3878 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
3879
3880 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
3881 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
3882
3883 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
3884 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
3885
3886 my $iothread = '';
3887 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
3888 $iothread .= ",iothread=iothread-$controller_prefix$controller";
3889 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
3890 } elsif ($drive->{iothread}) {
3891 warn "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n";
3892 }
3893
3894 my $queues = '';
3895 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
3896 $queues = ",num_queues=$drive->{queues}";
3897 }
3898
3899 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
3900 if !$scsicontroller->{$controller};
3901 $scsicontroller->{$controller}=1;
3902 }
3903
3904 if ($drive->{interface} eq 'sata') {
3905 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
3906 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
3907 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
3908 if !$ahcicontroller->{$controller};
3909 $ahcicontroller->{$controller}=1;
3910 }
3911
3912 my $pbs_conf = $pbs_backing->{$ds};
3913 my $pbs_name = undef;
3914 if ($pbs_conf) {
3915 $pbs_name = "drive-$ds-pbs";
3916 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
3917 }
3918
3919 my $drive_cmd = print_drive_commandline_full(
3920 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
3921
3922 # extra protection for templates, but SATA and IDE don't support it..
3923 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
3924
3925 push @$devices, '-drive',$drive_cmd;
3926 push @$devices, '-device', print_drivedevice_full(
3927 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
3928 });
3929
3930 for (my $i = 0; $i < $MAX_NETS; $i++) {
3931 my $netname = "net$i";
3932
3933 next if !$conf->{$netname};
3934 my $d = parse_net($conf->{$netname});
3935 next if !$d;
3936
3937 $use_virtio = 1 if $d->{model} eq 'virtio';
3938
3939 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
3940
3941 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
3942 push @$devices, '-netdev', $netdevfull;
3943
3944 my $netdevicefull = print_netdevice_full(
3945 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type);
3946
3947 push @$devices, '-device', $netdevicefull;
3948 }
3949
3950 if ($conf->{ivshmem}) {
3951 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
3952
3953 my $bus;
3954 if ($q35) {
3955 $bus = print_pcie_addr("ivshmem");
3956 } else {
3957 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
3958 }
3959
3960 my $ivshmem_name = $ivshmem->{name} // $vmid;
3961 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
3962
3963 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
3964 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
3965 .",size=$ivshmem->{size}M";
3966 }
3967
3968 # pci.4 is nested in pci.1
3969 $bridges->{1} = 1 if $bridges->{4};
3970
3971 if (!$q35) { # add pci bridges
3972 if (min_version($machine_version, 2, 3)) {
3973 $bridges->{1} = 1;
3974 $bridges->{2} = 1;
3975 }
3976 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
3977 }
3978
3979 for my $k (sort {$b cmp $a} keys %$bridges) {
3980 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
3981
3982 my $k_name = $k;
3983 if ($k == 2 && $legacy_igd) {
3984 $k_name = "$k-igd";
3985 }
3986 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
3987 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
3988
3989 if ($q35) { # add after -readconfig pve-q35.cfg
3990 splice @$devices, 2, 0, '-device', $devstr;
3991 } else {
3992 unshift @$devices, '-device', $devstr if $k > 0;
3993 }
3994 }
3995
3996 if (!$kvm) {
3997 push @$machineFlags, 'accel=tcg';
3998 }
3999
4000 push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga);
4001
4002 my $machine_type_min = $machine_type;
4003 if ($add_pve_version) {
4004 $machine_type_min =~ s/\+pve\d+$//;
4005 $machine_type_min .= "+pve$required_pve_version";
4006 }
4007 push @$machineFlags, "type=${machine_type_min}";
4008
4009 push @$cmd, @$devices;
4010 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
4011 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
4012 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
4013
4014 if (my $vmstate = $conf->{vmstate}) {
4015 my $statepath = PVE::Storage::path($storecfg, $vmstate);
4016 push @$vollist, $vmstate;
4017 push @$cmd, '-loadstate', $statepath;
4018 print "activating and using '$vmstate' as vmstate\n";
4019 }
4020
4021 if (PVE::QemuConfig->is_template($conf)) {
4022 # needed to workaround base volumes being read-only
4023 push @$cmd, '-snapshot';
4024 }
4025
4026 # add custom args
4027 if ($conf->{args}) {
4028 my $aa = PVE::Tools::split_args($conf->{args});
4029 push @$cmd, @$aa;
4030 }
4031
4032 return wantarray ? ($cmd, $vollist, $spice_port) : $cmd;
4033}
4034
4035sub check_rng_source {
4036 my ($source) = @_;
4037
4038 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4039 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4040 if ! -e $source;
4041
4042 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4043 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4044 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4045 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4046 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4047 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4048 ." to the host.\n";
4049 }
4050}
4051
4052sub spice_port {
4053 my ($vmid) = @_;
4054
4055 my $res = mon_cmd($vmid, 'query-spice');
4056
4057 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
4058}
4059
4060sub vm_devices_list {
4061 my ($vmid) = @_;
4062
4063 my $res = mon_cmd($vmid, 'query-pci');
4064 my $devices_to_check = [];
4065 my $devices = {};
4066 foreach my $pcibus (@$res) {
4067 push @$devices_to_check, @{$pcibus->{devices}},
4068 }
4069
4070 while (@$devices_to_check) {
4071 my $to_check = [];
4072 for my $d (@$devices_to_check) {
4073 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
4074 next if !$d->{'pci_bridge'};
4075
4076 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4077 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
4078 }
4079 $devices_to_check = $to_check;
4080 }
4081
4082 my $resblock = mon_cmd($vmid, 'query-block');
4083 foreach my $block (@$resblock) {
4084 if($block->{device} =~ m/^drive-(\S+)/){
4085 $devices->{$1} = 1;
4086 }
4087 }
4088
4089 my $resmice = mon_cmd($vmid, 'query-mice');
4090 foreach my $mice (@$resmice) {
4091 if ($mice->{name} eq 'QEMU HID Tablet') {
4092 $devices->{tablet} = 1;
4093 last;
4094 }
4095 }
4096
4097 # for usb devices there is no query-usb
4098 # but we can iterate over the entries in
4099 # qom-list path=/machine/peripheral
4100 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4101 foreach my $per (@$resperipheral) {
4102 if ($per->{name} =~ m/^usb\d+$/) {
4103 $devices->{$per->{name}} = 1;
4104 }
4105 }
4106
4107 return $devices;
4108}
4109
4110sub vm_deviceplug {
4111 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4112
4113 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4114
4115 my $devices_list = vm_devices_list($vmid);
4116 return 1 if defined($devices_list->{$deviceid});
4117
4118 # add PCI bridge if we need it for the device
4119 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4120
4121 if ($deviceid eq 'tablet') {
4122 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4123 } elsif ($deviceid eq 'keyboard') {
4124 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4125 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4126 die "usb hotplug currently not reliable\n";
4127 # since we can't reliably hot unplug all added usb devices and usb
4128 # passthrough breaks live migration we disable usb hotplugging for now
4129 #qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device));
4130 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4131 qemu_iothread_add($vmid, $deviceid, $device);
4132
4133 qemu_driveadd($storecfg, $vmid, $device);
4134 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4135
4136 qemu_deviceadd($vmid, $devicefull);
4137 eval { qemu_deviceaddverify($vmid, $deviceid); };
4138 if (my $err = $@) {
4139 eval { qemu_drivedel($vmid, $deviceid); };
4140 warn $@ if $@;
4141 die $err;
4142 }
4143 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4144 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4145 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4146 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4147
4148 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4149
4150 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4151 qemu_iothread_add($vmid, $deviceid, $device);
4152 $devicefull .= ",iothread=iothread-$deviceid";
4153 }
4154
4155 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4156 $devicefull .= ",num_queues=$device->{queues}";
4157 }
4158
4159 qemu_deviceadd($vmid, $devicefull);
4160 qemu_deviceaddverify($vmid, $deviceid);
4161 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4162 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4163 qemu_driveadd($storecfg, $vmid, $device);
4164
4165 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4166 eval { qemu_deviceadd($vmid, $devicefull); };
4167 if (my $err = $@) {
4168 eval { qemu_drivedel($vmid, $deviceid); };
4169 warn $@ if $@;
4170 die $err;
4171 }
4172 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4173 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4174
4175 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4176 my $use_old_bios_files = undef;
4177 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4178
4179 my $netdevicefull = print_netdevice_full(
4180 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type);
4181 qemu_deviceadd($vmid, $netdevicefull);
4182 eval {
4183 qemu_deviceaddverify($vmid, $deviceid);
4184 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4185 };
4186 if (my $err = $@) {
4187 eval { qemu_netdevdel($vmid, $deviceid); };
4188 warn $@ if $@;
4189 die $err;
4190 }
4191 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4192 my $bridgeid = $2;
4193 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4194 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4195
4196 qemu_deviceadd($vmid, $devicefull);
4197 qemu_deviceaddverify($vmid, $deviceid);
4198 } else {
4199 die "can't hotplug device '$deviceid'\n";
4200 }
4201
4202 return 1;
4203}
4204
4205# fixme: this should raise exceptions on error!
4206sub vm_deviceunplug {
4207 my ($vmid, $conf, $deviceid) = @_;
4208
4209 my $devices_list = vm_devices_list($vmid);
4210 return 1 if !defined($devices_list->{$deviceid});
4211
4212 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4213 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4214
4215 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard') {
4216 qemu_devicedel($vmid, $deviceid);
4217 } elsif ($deviceid =~ m/^usb\d+$/) {
4218 die "usb hotplug currently not reliable\n";
4219 # when unplugging usb devices this way, there may be remaining usb
4220 # controllers/hubs so we disable it for now
4221 #qemu_devicedel($vmid, $deviceid);
4222 #qemu_devicedelverify($vmid, $deviceid);
4223 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4224 my $device = parse_drive($deviceid, $conf->{$deviceid});
4225
4226 qemu_devicedel($vmid, $deviceid);
4227 qemu_devicedelverify($vmid, $deviceid);
4228 qemu_drivedel($vmid, $deviceid);
4229 qemu_iothread_del($vmid, $deviceid, $device);
4230 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4231 qemu_devicedel($vmid, $deviceid);
4232 qemu_devicedelverify($vmid, $deviceid);
4233 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4234 my $device = parse_drive($deviceid, $conf->{$deviceid});
4235
4236 qemu_devicedel($vmid, $deviceid);
4237 qemu_drivedel($vmid, $deviceid);
4238 qemu_deletescsihw($conf, $vmid, $deviceid);
4239
4240 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4241 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4242 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4243 qemu_devicedel($vmid, $deviceid);
4244 qemu_devicedelverify($vmid, $deviceid);
4245 qemu_netdevdel($vmid, $deviceid);
4246 } else {
4247 die "can't unplug device '$deviceid'\n";
4248 }
4249
4250 return 1;
4251}
4252
4253sub qemu_deviceadd {
4254 my ($vmid, $devicefull) = @_;
4255
4256 $devicefull = "driver=".$devicefull;
4257 my %options = split(/[=,]/, $devicefull);
4258
4259 mon_cmd($vmid, "device_add" , %options);
4260}
4261
4262sub qemu_devicedel {
4263 my ($vmid, $deviceid) = @_;
4264
4265 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
4266}
4267
4268sub qemu_iothread_add {
4269 my ($vmid, $deviceid, $device) = @_;
4270
4271 if ($device->{iothread}) {
4272 my $iothreads = vm_iothreads_list($vmid);
4273 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4274 }
4275}
4276
4277sub qemu_iothread_del {
4278 my ($vmid, $deviceid, $device) = @_;
4279
4280 if ($device->{iothread}) {
4281 my $iothreads = vm_iothreads_list($vmid);
4282 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4283 }
4284}
4285
4286sub qemu_objectadd {
4287 my ($vmid, $objectid, $qomtype) = @_;
4288
4289 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4290
4291 return 1;
4292}
4293
4294sub qemu_objectdel {
4295 my ($vmid, $objectid) = @_;
4296
4297 mon_cmd($vmid, "object-del", id => $objectid);
4298
4299 return 1;
4300}
4301
4302sub qemu_driveadd {
4303 my ($storecfg, $vmid, $device) = @_;
4304
4305 my $kvmver = get_running_qemu_version($vmid);
4306 my $io_uring = min_version($kvmver, 6, 0);
4307 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4308 $drive =~ s/\\/\\\\/g;
4309 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4310
4311 # If the command succeeds qemu prints: "OK"
4312 return 1 if $ret =~ m/OK/s;
4313
4314 die "adding drive failed: $ret\n";
4315}
4316
4317sub qemu_drivedel {
4318 my ($vmid, $deviceid) = @_;
4319
4320 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4321 $ret =~ s/^\s+//;
4322
4323 return 1 if $ret eq "";
4324
4325 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4326 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4327
4328 die "deleting drive $deviceid failed : $ret\n";
4329}
4330
4331sub qemu_deviceaddverify {
4332 my ($vmid, $deviceid) = @_;
4333
4334 for (my $i = 0; $i <= 5; $i++) {
4335 my $devices_list = vm_devices_list($vmid);
4336 return 1 if defined($devices_list->{$deviceid});
4337 sleep 1;
4338 }
4339
4340 die "error on hotplug device '$deviceid'\n";
4341}
4342
4343
4344sub qemu_devicedelverify {
4345 my ($vmid, $deviceid) = @_;
4346
4347 # need to verify that the device is correctly removed as device_del
4348 # is async and empty return is not reliable
4349
4350 for (my $i = 0; $i <= 5; $i++) {
4351 my $devices_list = vm_devices_list($vmid);
4352 return 1 if !defined($devices_list->{$deviceid});
4353 sleep 1;
4354 }
4355
4356 die "error on hot-unplugging device '$deviceid'\n";
4357}
4358
4359sub qemu_findorcreatescsihw {
4360 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4361
4362 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4363
4364 my $scsihwid="$controller_prefix$controller";
4365 my $devices_list = vm_devices_list($vmid);
4366
4367 if (!defined($devices_list->{$scsihwid})) {
4368 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4369 }
4370
4371 return 1;
4372}
4373
4374sub qemu_deletescsihw {
4375 my ($conf, $vmid, $opt) = @_;
4376
4377 my $device = parse_drive($opt, $conf->{$opt});
4378
4379 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4380 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4381 return 1;
4382 }
4383
4384 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4385
4386 my $devices_list = vm_devices_list($vmid);
4387 foreach my $opt (keys %{$devices_list}) {
4388 if (is_valid_drivename($opt)) {
4389 my $drive = parse_drive($opt, $conf->{$opt});
4390 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4391 return 1;
4392 }
4393 }
4394 }
4395
4396 my $scsihwid="scsihw$controller";
4397
4398 vm_deviceunplug($vmid, $conf, $scsihwid);
4399
4400 return 1;
4401}
4402
4403sub qemu_add_pci_bridge {
4404 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4405
4406 my $bridges = {};
4407
4408 my $bridgeid;
4409
4410 print_pci_addr($device, $bridges, $arch, $machine_type);
4411
4412 while (my ($k, $v) = each %$bridges) {
4413 $bridgeid = $k;
4414 }
4415 return 1 if !defined($bridgeid) || $bridgeid < 1;
4416
4417 my $bridge = "pci.$bridgeid";
4418 my $devices_list = vm_devices_list($vmid);
4419
4420 if (!defined($devices_list->{$bridge})) {
4421 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4422 }
4423
4424 return 1;
4425}
4426
4427sub qemu_set_link_status {
4428 my ($vmid, $device, $up) = @_;
4429
4430 mon_cmd($vmid, "set_link", name => $device,
4431 up => $up ? JSON::true : JSON::false);
4432}
4433
4434sub qemu_netdevadd {
4435 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4436
4437 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4438 my %options = split(/[=,]/, $netdev);
4439
4440 if (defined(my $vhost = $options{vhost})) {
4441 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4442 }
4443
4444 if (defined(my $queues = $options{queues})) {
4445 $options{queues} = $queues + 0;
4446 }
4447
4448 mon_cmd($vmid, "netdev_add", %options);
4449 return 1;
4450}
4451
4452sub qemu_netdevdel {
4453 my ($vmid, $deviceid) = @_;
4454
4455 mon_cmd($vmid, "netdev_del", id => $deviceid);
4456}
4457
4458sub qemu_usb_hotplug {
4459 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4460
4461 return if !$device;
4462
4463 # remove the old one first
4464 vm_deviceunplug($vmid, $conf, $deviceid);
4465
4466 # check if xhci controller is necessary and available
4467 if ($device->{usb3}) {
4468
4469 my $devicelist = vm_devices_list($vmid);
4470
4471 if (!$devicelist->{xhci}) {
4472 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4473 qemu_deviceadd($vmid, "nec-usb-xhci,id=xhci$pciaddr");
4474 }
4475 }
4476 my $d = parse_usb_device($device->{host});
4477 $d->{usb3} = $device->{usb3};
4478
4479 # add the new one
4480 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d, $arch, $machine_type);
4481}
4482
4483sub qemu_cpu_hotplug {
4484 my ($vmid, $conf, $vcpus) = @_;
4485
4486 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4487
4488 my $sockets = 1;
4489 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4490 $sockets = $conf->{sockets} if $conf->{sockets};
4491 my $cores = $conf->{cores} || 1;
4492 my $maxcpus = $sockets * $cores;
4493
4494 $vcpus = $maxcpus if !$vcpus;
4495
4496 die "you can't add more vcpus than maxcpus\n"
4497 if $vcpus > $maxcpus;
4498
4499 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4500
4501 if ($vcpus < $currentvcpus) {
4502
4503 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4504
4505 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4506 qemu_devicedel($vmid, "cpu$i");
4507 my $retry = 0;
4508 my $currentrunningvcpus = undef;
4509 while (1) {
4510 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4511 last if scalar(@{$currentrunningvcpus}) == $i-1;
4512 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4513 $retry++;
4514 sleep 1;
4515 }
4516 #update conf after each succesfull cpu unplug
4517 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4518 PVE::QemuConfig->write_config($vmid, $conf);
4519 }
4520 } else {
4521 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4522 }
4523
4524 return;
4525 }
4526
4527 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4528 die "vcpus in running vm does not match its configuration\n"
4529 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4530
4531 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4532
4533 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4534 my $cpustr = print_cpu_device($conf, $i);
4535 qemu_deviceadd($vmid, $cpustr);
4536
4537 my $retry = 0;
4538 my $currentrunningvcpus = undef;
4539 while (1) {
4540 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4541 last if scalar(@{$currentrunningvcpus}) == $i;
4542 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4543 sleep 1;
4544 $retry++;
4545 }
4546 #update conf after each succesfull cpu hotplug
4547 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4548 PVE::QemuConfig->write_config($vmid, $conf);
4549 }
4550 } else {
4551
4552 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4553 mon_cmd($vmid, "cpu-add", id => int($i));
4554 }
4555 }
4556}
4557
4558sub qemu_block_set_io_throttle {
4559 my ($vmid, $deviceid,
4560 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4561 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4562 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4563 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4564
4565 return if !check_running($vmid) ;
4566
4567 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4568 bps => int($bps),
4569 bps_rd => int($bps_rd),
4570 bps_wr => int($bps_wr),
4571 iops => int($iops),
4572 iops_rd => int($iops_rd),
4573 iops_wr => int($iops_wr),
4574 bps_max => int($bps_max),
4575 bps_rd_max => int($bps_rd_max),
4576 bps_wr_max => int($bps_wr_max),
4577 iops_max => int($iops_max),
4578 iops_rd_max => int($iops_rd_max),
4579 iops_wr_max => int($iops_wr_max),
4580 bps_max_length => int($bps_max_length),
4581 bps_rd_max_length => int($bps_rd_max_length),
4582 bps_wr_max_length => int($bps_wr_max_length),
4583 iops_max_length => int($iops_max_length),
4584 iops_rd_max_length => int($iops_rd_max_length),
4585 iops_wr_max_length => int($iops_wr_max_length),
4586 );
4587
4588}
4589
4590sub qemu_block_resize {
4591 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4592
4593 my $running = check_running($vmid);
4594
4595 $size = 0 if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4596
4597 return if !$running;
4598
4599 my $padding = (1024 - $size % 1024) % 1024;
4600 $size = $size + $padding;
4601
4602 mon_cmd(
4603 $vmid,
4604 "block_resize",
4605 device => $deviceid,
4606 size => int($size),
4607 timeout => 60,
4608 );
4609}
4610
4611sub qemu_volume_snapshot {
4612 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4613
4614 my $running = check_running($vmid);
4615
4616 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4617 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4618 } else {
4619 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4620 }
4621}
4622
4623sub qemu_volume_snapshot_delete {
4624 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4625
4626 my $running = check_running($vmid);
4627
4628 if($running) {
4629
4630 $running = undef;
4631 my $conf = PVE::QemuConfig->load_config($vmid);
4632 PVE::QemuConfig->foreach_volume($conf, sub {
4633 my ($ds, $drive) = @_;
4634 $running = 1 if $drive->{file} eq $volid;
4635 });
4636 }
4637
4638 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4639 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
4640 } else {
4641 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4642 }
4643}
4644
4645sub set_migration_caps {
4646 my ($vmid, $savevm) = @_;
4647
4648 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4649
4650 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4651 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4652
4653 my $cap_ref = [];
4654
4655 my $enabled_cap = {
4656 "auto-converge" => 1,
4657 "xbzrle" => 1,
4658 "x-rdma-pin-all" => 0,
4659 "zero-blocks" => 0,
4660 "compress" => 0,
4661 "dirty-bitmaps" => $dirty_bitmaps,
4662 };
4663
4664 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4665
4666 for my $supported_capability (@$supported_capabilities) {
4667 push @$cap_ref, {
4668 capability => $supported_capability->{capability},
4669 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4670 };
4671 }
4672
4673 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4674}
4675
4676sub foreach_volid {
4677 my ($conf, $func, @param) = @_;
4678
4679 my $volhash = {};
4680
4681 my $test_volid = sub {
4682 my ($key, $drive, $snapname) = @_;
4683
4684 my $volid = $drive->{file};
4685 return if !$volid;
4686
4687 $volhash->{$volid}->{cdrom} //= 1;
4688 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4689
4690 my $replicate = $drive->{replicate} // 1;
4691 $volhash->{$volid}->{replicate} //= 0;
4692 $volhash->{$volid}->{replicate} = 1 if $replicate;
4693
4694 $volhash->{$volid}->{shared} //= 0;
4695 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4696
4697 $volhash->{$volid}->{referenced_in_config} //= 0;
4698 $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname);
4699
4700 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4701 if defined($snapname);
4702
4703 my $size = $drive->{size};
4704 $volhash->{$volid}->{size} //= $size if $size;
4705
4706 $volhash->{$volid}->{is_vmstate} //= 0;
4707 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4708
4709 $volhash->{$volid}->{is_tpmstate} //= 0;
4710 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4711
4712 $volhash->{$volid}->{is_unused} //= 0;
4713 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4714
4715 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4716 };
4717
4718 my $include_opts = {
4719 extra_keys => ['vmstate'],
4720 include_unused => 1,
4721 };
4722
4723 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4724 foreach my $snapname (keys %{$conf->{snapshots}}) {
4725 my $snap = $conf->{snapshots}->{$snapname};
4726 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4727 }
4728
4729 foreach my $volid (keys %$volhash) {
4730 &$func($volid, $volhash->{$volid}, @param);
4731 }
4732}
4733
4734my $fast_plug_option = {
4735 'lock' => 1,
4736 'name' => 1,
4737 'onboot' => 1,
4738 'shares' => 1,
4739 'startup' => 1,
4740 'description' => 1,
4741 'protection' => 1,
4742 'vmstatestorage' => 1,
4743 'hookscript' => 1,
4744 'tags' => 1,
4745};
4746
4747# hotplug changes in [PENDING]
4748# $selection hash can be used to only apply specified options, for
4749# example: { cores => 1 } (only apply changed 'cores')
4750# $errors ref is used to return error messages
4751sub vmconfig_hotplug_pending {
4752 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4753
4754 my $defaults = load_defaults();
4755 my $arch = get_vm_arch($conf);
4756 my $machine_type = get_vm_machine($conf, undef, $arch);
4757
4758 # commit values which do not have any impact on running VM first
4759 # Note: those option cannot raise errors, we we do not care about
4760 # $selection and always apply them.
4761
4762 my $add_error = sub {
4763 my ($opt, $msg) = @_;
4764 $errors->{$opt} = "hotplug problem - $msg";
4765 };
4766
4767 my $changes = 0;
4768 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4769 if ($fast_plug_option->{$opt}) {
4770 $conf->{$opt} = $conf->{pending}->{$opt};
4771 delete $conf->{pending}->{$opt};
4772 $changes = 1;
4773 }
4774 }
4775
4776 if ($changes) {
4777 PVE::QemuConfig->write_config($vmid, $conf);
4778 }
4779
4780 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
4781
4782 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
4783 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4784 foreach my $opt (sort keys %$pending_delete_hash) {
4785 next if $selection && !$selection->{$opt};
4786 my $force = $pending_delete_hash->{$opt}->{force};
4787 eval {
4788 if ($opt eq 'hotplug') {
4789 die "skip\n" if ($conf->{hotplug} =~ /memory/);
4790 } elsif ($opt eq 'tablet') {
4791 die "skip\n" if !$hotplug_features->{usb};
4792 if ($defaults->{tablet}) {
4793 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4794 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4795 if $arch eq 'aarch64';
4796 } else {
4797 vm_deviceunplug($vmid, $conf, 'tablet');
4798 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4799 }
4800 } elsif ($opt =~ m/^usb\d+/) {
4801 die "skip\n";
4802 # since we cannot reliably hot unplug usb devices we are disabling it
4803 #die "skip\n" if !$hotplug_features->{usb} || $conf->{$opt} =~ m/spice/i;
4804 #vm_deviceunplug($vmid, $conf, $opt);
4805 } elsif ($opt eq 'vcpus') {
4806 die "skip\n" if !$hotplug_features->{cpu};
4807 qemu_cpu_hotplug($vmid, $conf, undef);
4808 } elsif ($opt eq 'balloon') {
4809 # enable balloon device is not hotpluggable
4810 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
4811 # here we reset the ballooning value to memory
4812 my $balloon = $conf->{memory} || $defaults->{memory};
4813 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4814 } elsif ($fast_plug_option->{$opt}) {
4815 # do nothing
4816 } elsif ($opt =~ m/^net(\d+)$/) {
4817 die "skip\n" if !$hotplug_features->{network};
4818 vm_deviceunplug($vmid, $conf, $opt);
4819 } elsif (is_valid_drivename($opt)) {
4820 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
4821 vm_deviceunplug($vmid, $conf, $opt);
4822 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4823 } elsif ($opt =~ m/^memory$/) {
4824 die "skip\n" if !$hotplug_features->{memory};
4825 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
4826 } elsif ($opt eq 'cpuunits') {
4827 $cgroup->change_cpu_shares(undef, 1024);
4828 } elsif ($opt eq 'cpulimit') {
4829 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
4830 } else {
4831 die "skip\n";
4832 }
4833 };
4834 if (my $err = $@) {
4835 &$add_error($opt, $err) if $err ne "skip\n";
4836 } else {
4837 delete $conf->{$opt};
4838 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4839 }
4840 }
4841
4842 my ($apply_pending_cloudinit, $apply_pending_cloudinit_done);
4843 $apply_pending_cloudinit = sub {
4844 return if $apply_pending_cloudinit_done; # once is enough
4845 $apply_pending_cloudinit_done = 1; # once is enough
4846
4847 my ($key, $value) = @_;
4848
4849 my @cloudinit_opts = keys %$confdesc_cloudinit;
4850 foreach my $opt (keys %{$conf->{pending}}) {
4851 next if !grep { $_ eq $opt } @cloudinit_opts;
4852 $conf->{$opt} = delete $conf->{pending}->{$opt};
4853 }
4854
4855 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4856 foreach my $opt (sort keys %$pending_delete_hash) {
4857 next if !grep { $_ eq $opt } @cloudinit_opts;
4858 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4859 delete $conf->{$opt};
4860 }
4861
4862 my $new_conf = { %$conf };
4863 $new_conf->{$key} = $value;
4864 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($new_conf, $vmid);
4865 };
4866
4867 foreach my $opt (keys %{$conf->{pending}}) {
4868 next if $selection && !$selection->{$opt};
4869 my $value = $conf->{pending}->{$opt};
4870 eval {
4871 if ($opt eq 'hotplug') {
4872 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
4873 } elsif ($opt eq 'tablet') {
4874 die "skip\n" if !$hotplug_features->{usb};
4875 if ($value == 1) {
4876 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4877 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4878 if $arch eq 'aarch64';
4879 } elsif ($value == 0) {
4880 vm_deviceunplug($vmid, $conf, 'tablet');
4881 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4882 }
4883 } elsif ($opt =~ m/^usb\d+$/) {
4884 die "skip\n";
4885 # since we cannot reliably hot unplug usb devices we disable it for now
4886 #die "skip\n" if !$hotplug_features->{usb} || $value =~ m/spice/i;
4887 #my $d = eval { parse_property_string($usbdesc->{format}, $value) };
4888 #die "skip\n" if !$d;
4889 #qemu_usb_hotplug($storecfg, $conf, $vmid, $opt, $d, $arch, $machine_type);
4890 } elsif ($opt eq 'vcpus') {
4891 die "skip\n" if !$hotplug_features->{cpu};
4892 qemu_cpu_hotplug($vmid, $conf, $value);
4893 } elsif ($opt eq 'balloon') {
4894 # enable/disable balloning device is not hotpluggable
4895 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
4896 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
4897 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
4898
4899 # allow manual ballooning if shares is set to zero
4900 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
4901 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
4902 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4903 }
4904 } elsif ($opt =~ m/^net(\d+)$/) {
4905 # some changes can be done without hotplug
4906 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
4907 $vmid, $opt, $value, $arch, $machine_type);
4908 } elsif (is_valid_drivename($opt)) {
4909 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
4910 # some changes can be done without hotplug
4911 my $drive = parse_drive($opt, $value);
4912 if (drive_is_cloudinit($drive)) {
4913 &$apply_pending_cloudinit($opt, $value);
4914 }
4915 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
4916 $vmid, $opt, $value, $arch, $machine_type);
4917 } elsif ($opt =~ m/^memory$/) { #dimms
4918 die "skip\n" if !$hotplug_features->{memory};
4919 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
4920 } elsif ($opt eq 'cpuunits') {
4921 $cgroup->change_cpu_shares($conf->{pending}->{$opt}, 1024);
4922 } elsif ($opt eq 'cpulimit') {
4923 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
4924 $cgroup->change_cpu_quota($cpulimit, 100000);
4925 } elsif ($opt eq 'agent') {
4926 vmconfig_update_agent($conf, $opt, $value);
4927 } else {
4928 die "skip\n"; # skip non-hot-pluggable options
4929 }
4930 };
4931 if (my $err = $@) {
4932 &$add_error($opt, $err) if $err ne "skip\n";
4933 } else {
4934 $conf->{$opt} = $value;
4935 delete $conf->{pending}->{$opt};
4936 }
4937 }
4938
4939 PVE::QemuConfig->write_config($vmid, $conf);
4940}
4941
4942sub try_deallocate_drive {
4943 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
4944
4945 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
4946 my $volid = $drive->{file};
4947 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
4948 my $sid = PVE::Storage::parse_volume_id($volid);
4949 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
4950
4951 # check if the disk is really unused
4952 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
4953 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
4954 PVE::Storage::vdisk_free($storecfg, $volid);
4955 return 1;
4956 } else {
4957 # If vm is not owner of this disk remove from config
4958 return 1;
4959 }
4960 }
4961
4962 return;
4963}
4964
4965sub vmconfig_delete_or_detach_drive {
4966 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
4967
4968 my $drive = parse_drive($opt, $conf->{$opt});
4969
4970 my $rpcenv = PVE::RPCEnvironment::get();
4971 my $authuser = $rpcenv->get_user();
4972
4973 if ($force) {
4974 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
4975 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
4976 } else {
4977 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
4978 }
4979}
4980
4981
4982
4983sub vmconfig_apply_pending {
4984 my ($vmid, $conf, $storecfg, $errors) = @_;
4985
4986 return if !scalar(keys %{$conf->{pending}});
4987
4988 my $add_apply_error = sub {
4989 my ($opt, $msg) = @_;
4990 my $err_msg = "unable to apply pending change $opt : $msg";
4991 $errors->{$opt} = $err_msg;
4992 warn $err_msg;
4993 };
4994
4995 # cold plug
4996
4997 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4998 foreach my $opt (sort keys %$pending_delete_hash) {
4999 my $force = $pending_delete_hash->{$opt}->{force};
5000 eval {
5001 if ($opt =~ m/^unused/) {
5002 die "internal error";
5003 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5004 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5005 }
5006 };
5007 if (my $err = $@) {
5008 $add_apply_error->($opt, $err);
5009 } else {
5010 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5011 delete $conf->{$opt};
5012 }
5013 }
5014
5015 PVE::QemuConfig->cleanup_pending($conf);
5016
5017 foreach my $opt (keys %{$conf->{pending}}) { # add/change
5018 next if $opt eq 'delete'; # just to be sure
5019 eval {
5020 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5021 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
5022 }
5023 };
5024 if (my $err = $@) {
5025 $add_apply_error->($opt, $err);
5026 } else {
5027 $conf->{$opt} = delete $conf->{pending}->{$opt};
5028 }
5029 }
5030
5031 # write all changes at once to avoid unnecessary i/o
5032 PVE::QemuConfig->write_config($vmid, $conf);
5033}
5034
5035sub vmconfig_update_net {
5036 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5037
5038 my $newnet = parse_net($value);
5039
5040 if ($conf->{$opt}) {
5041 my $oldnet = parse_net($conf->{$opt});
5042
5043 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5044 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5045 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
5046 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
5047
5048 # for non online change, we try to hot-unplug
5049 die "skip\n" if !$hotplug;
5050 vm_deviceunplug($vmid, $conf, $opt);
5051 } else {
5052
5053 die "internal error" if $opt !~ m/net(\d+)/;
5054 my $iface = "tap${vmid}i$1";
5055
5056 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5057 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5058 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5059 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
5060 PVE::Network::tap_unplug($iface);
5061
5062 if ($have_sdn) {
5063 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5064 } else {
5065 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5066 }
5067 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
5068 # Rate can be applied on its own but any change above needs to
5069 # include the rate in tap_plug since OVS resets everything.
5070 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
5071 }
5072
5073 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
5074 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5075 }
5076
5077 return 1;
5078 }
5079 }
5080
5081 if ($hotplug) {
5082 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5083 } else {
5084 die "skip\n";
5085 }
5086}
5087
5088sub vmconfig_update_agent {
5089 my ($conf, $opt, $value) = @_;
5090
5091 die "skip\n" if !$conf->{$opt};
5092
5093 my $hotplug_options = { fstrim_cloned_disks => 1 };
5094
5095 my $old_agent = parse_guest_agent($conf);
5096 my $agent = parse_guest_agent({$opt => $value});
5097
5098 for my $option (keys %$agent) { # added/changed options
5099 next if defined($hotplug_options->{$option});
5100 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5101 }
5102
5103 for my $option (keys %$old_agent) { # removed options
5104 next if defined($hotplug_options->{$option});
5105 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5106 }
5107
5108 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
5109}
5110
5111sub vmconfig_update_disk {
5112 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5113
5114 my $drive = parse_drive($opt, $value);
5115
5116 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5117 my $media = $drive->{media} || 'disk';
5118 my $oldmedia = $old_drive->{media} || 'disk';
5119 die "unable to change media type\n" if $media ne $oldmedia;
5120
5121 if (!drive_is_cdrom($old_drive)) {
5122
5123 if ($drive->{file} ne $old_drive->{file}) {
5124
5125 die "skip\n" if !$hotplug;
5126
5127 # unplug and register as unused
5128 vm_deviceunplug($vmid, $conf, $opt);
5129 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5130
5131 } else {
5132 # update existing disk
5133
5134 # skip non hotpluggable value
5135 if (safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5136 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5137 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5138 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5139 safe_string_ne($drive->{ssd}, $old_drive->{ssd})) {
5140 die "skip\n";
5141 }
5142
5143 # apply throttle
5144 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5145 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5146 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5147 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5148 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5149 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5150 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5151 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5152 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5153 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5154 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5155 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5156 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5157 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5158 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5159 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5160 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5161 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5162
5163 qemu_block_set_io_throttle(
5164 $vmid,"drive-$opt",
5165 ($drive->{mbps} || 0)*1024*1024,
5166 ($drive->{mbps_rd} || 0)*1024*1024,
5167 ($drive->{mbps_wr} || 0)*1024*1024,
5168 $drive->{iops} || 0,
5169 $drive->{iops_rd} || 0,
5170 $drive->{iops_wr} || 0,
5171 ($drive->{mbps_max} || 0)*1024*1024,
5172 ($drive->{mbps_rd_max} || 0)*1024*1024,
5173 ($drive->{mbps_wr_max} || 0)*1024*1024,
5174 $drive->{iops_max} || 0,
5175 $drive->{iops_rd_max} || 0,
5176 $drive->{iops_wr_max} || 0,
5177 $drive->{bps_max_length} || 1,
5178 $drive->{bps_rd_max_length} || 1,
5179 $drive->{bps_wr_max_length} || 1,
5180 $drive->{iops_max_length} || 1,
5181 $drive->{iops_rd_max_length} || 1,
5182 $drive->{iops_wr_max_length} || 1,
5183 );
5184
5185 }
5186
5187 return 1;
5188 }
5189
5190 } else { # cdrom
5191
5192 if ($drive->{file} eq 'none') {
5193 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5194 if (drive_is_cloudinit($old_drive)) {
5195 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5196 }
5197 } else {
5198 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5199
5200 # force eject if locked
5201 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5202
5203 if ($path) {
5204 mon_cmd($vmid, "blockdev-change-medium",
5205 id => "$opt", filename => "$path");
5206 }
5207 }
5208
5209 return 1;
5210 }
5211 }
5212
5213 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5214 # hotplug new disks
5215 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5216 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5217}
5218
5219# called in locked context by incoming migration
5220sub vm_migrate_get_nbd_disks {
5221 my ($storecfg, $conf, $replicated_volumes) = @_;
5222
5223 my $local_volumes = {};
5224 PVE::QemuConfig->foreach_volume($conf, sub {
5225 my ($ds, $drive) = @_;
5226
5227 return if drive_is_cdrom($drive);
5228 return if $ds eq 'tpmstate0';
5229
5230 my $volid = $drive->{file};
5231
5232 return if !$volid;
5233
5234 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5235
5236 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5237 return if $scfg->{shared};
5238
5239 # replicated disks re-use existing state via bitmap
5240 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5241 $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
5242 });
5243 return $local_volumes;
5244}
5245
5246# called in locked context by incoming migration
5247sub vm_migrate_alloc_nbd_disks {
5248 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5249
5250 my $format = undef;
5251
5252 my $nbd = {};
5253 foreach my $opt (sort keys %$source_volumes) {
5254 my ($volid, $storeid, $volname, $drive, $use_existing) = @{$source_volumes->{$opt}};
5255
5256 if ($use_existing) {
5257 $nbd->{$opt}->{drivestr} = print_drive($drive);
5258 $nbd->{$opt}->{volid} = $volid;
5259 $nbd->{$opt}->{replicated} = 1;
5260 next;
5261 }
5262
5263 # If a remote storage is specified and the format of the original
5264 # volume is not available there, fall back to the default format.
5265 # Otherwise use the same format as the original.
5266 if (!$storagemap->{identity}) {
5267 $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
5268 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5269 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5270 my $fileFormat = qemu_img_format($scfg, $volname);
5271 $format = (grep {$fileFormat eq $_} @{$validFormats}) ? $fileFormat : $defFormat;
5272 } else {
5273 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5274 $format = qemu_img_format($scfg, $volname);
5275 }
5276
5277 my $size = $drive->{size} / 1024;
5278 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5279 my $newdrive = $drive;
5280 $newdrive->{format} = $format;
5281 $newdrive->{file} = $newvolid;
5282 my $drivestr = print_drive($newdrive);
5283 $nbd->{$opt}->{drivestr} = $drivestr;
5284 $nbd->{$opt}->{volid} = $newvolid;
5285 }
5286
5287 return $nbd;
5288}
5289
5290# see vm_start_nolock for parameters, additionally:
5291# migrate_opts:
5292# storagemap = parsed storage map for allocating NBD disks
5293sub vm_start {
5294 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5295
5296 return PVE::QemuConfig->lock_config($vmid, sub {
5297 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5298
5299 die "you can't start a vm if it's a template\n"
5300 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5301
5302 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5303 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5304
5305 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5306
5307 if ($has_backup_lock && $running) {
5308 # a backup is currently running, attempt to start the guest in the
5309 # existing QEMU instance
5310 return vm_resume($vmid);
5311 }
5312
5313 PVE::QemuConfig->check_lock($conf)
5314 if !($params->{skiplock} || $has_suspended_lock);
5315
5316 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5317
5318 die "VM $vmid already running\n" if $running;
5319
5320 if (my $storagemap = $migrate_opts->{storagemap}) {
5321 my $replicated = $migrate_opts->{replicated_volumes};
5322 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5323 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5324
5325 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5326 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5327 }
5328 }
5329
5330 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5331 });
5332}
5333
5334
5335# params:
5336# statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5337# skiplock => 0/1, skip checking for config lock
5338# skiptemplate => 0/1, skip checking whether VM is template
5339# forcemachine => to force Qemu machine (rollback/migration)
5340# forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5341# timeout => in seconds
5342# paused => start VM in paused state (backup)
5343# resume => resume from hibernation
5344# pbs-backing => {
5345# sata0 => {
5346# repository
5347# snapshot
5348# keyfile
5349# archive
5350# },
5351# virtio2 => ...
5352# }
5353# migrate_opts:
5354# nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5355# migratedfrom => source node
5356# spice_ticket => used for spice migration, passed via tunnel/stdin
5357# network => CIDR of migration network
5358# type => secure/insecure - tunnel over encrypted connection or plain-text
5359# nbd_proto_version => int, 0 for TCP, 1 for UNIX
5360# replicated_volumes => which volids should be re-used with bitmaps for nbd migration
5361# tpmstate_vol => new volid of tpmstate0, not yet contained in config
5362sub vm_start_nolock {
5363 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5364
5365 my $statefile = $params->{statefile};
5366 my $resume = $params->{resume};
5367
5368 my $migratedfrom = $migrate_opts->{migratedfrom};
5369 my $migration_type = $migrate_opts->{type};
5370
5371 my $res = {};
5372
5373 # clean up leftover reboot request files
5374 eval { clear_reboot_request($vmid); };
5375 warn $@ if $@;
5376
5377 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5378 vmconfig_apply_pending($vmid, $conf, $storecfg);
5379 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5380 }
5381
5382 # don't regenerate the ISO if the VM is started as part of a live migration
5383 # this way we can reuse the old ISO with the correct config
5384 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if !$migratedfrom;
5385
5386 # override TPM state vol if migrated, conf is out of date still
5387 if (my $tpmvol = $migrate_opts->{tpmstate_vol}) {
5388 my $parsed = parse_drive("tpmstate0", $conf->{tpmstate0});
5389 $parsed->{file} = $tpmvol;
5390 $conf->{tpmstate0} = print_drive($parsed);
5391 }
5392
5393 my $defaults = load_defaults();
5394
5395 # set environment variable useful inside network script
5396 $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom;
5397
5398 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5399
5400 my $forcemachine = $params->{forcemachine};
5401 my $forcecpu = $params->{forcecpu};
5402 if ($resume) {
5403 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5404 $forcemachine = $conf->{runningmachine};
5405 $forcecpu = $conf->{runningcpu};
5406 print "Resuming suspended VM\n";
5407 }
5408
5409 my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid,
5410 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
5411
5412 my $migration_ip;
5413 my $get_migration_ip = sub {
5414 my ($nodename) = @_;
5415
5416 return $migration_ip if defined($migration_ip);
5417
5418 my $cidr = $migrate_opts->{network};
5419
5420 if (!defined($cidr)) {
5421 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5422 $cidr = $dc_conf->{migration}->{network};
5423 }
5424
5425 if (defined($cidr)) {
5426 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5427
5428 die "could not get IP: no address configured on local " .
5429 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5430
5431 die "could not get IP: multiple addresses configured on local " .
5432 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5433
5434 $migration_ip = @$ips[0];
5435 }
5436
5437 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5438 if !defined($migration_ip);
5439
5440 return $migration_ip;
5441 };
5442
5443 my $migrate_uri;
5444 if ($statefile) {
5445 if ($statefile eq 'tcp') {
5446 my $localip = "localhost";
5447 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5448 my $nodename = nodename();
5449
5450 if (!defined($migration_type)) {
5451 if (defined($datacenterconf->{migration}->{type})) {
5452 $migration_type = $datacenterconf->{migration}->{type};
5453 } else {
5454 $migration_type = 'secure';
5455 }
5456 }
5457
5458 if ($migration_type eq 'insecure') {
5459 $localip = $get_migration_ip->($nodename);
5460 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5461 }
5462
5463 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5464 my $migrate_port = PVE::Tools::next_migrate_port($pfamily);
5465 $migrate_uri = "tcp:${localip}:${migrate_port}";
5466 push @$cmd, '-incoming', $migrate_uri;
5467 push @$cmd, '-S';
5468
5469 } elsif ($statefile eq 'unix') {
5470 # should be default for secure migrations as a ssh TCP forward
5471 # tunnel is not deterministic reliable ready and fails regurarly
5472 # to set up in time, so use UNIX socket forwards
5473 my $socket_addr = "/run/qemu-server/$vmid.migrate";
5474 unlink $socket_addr;
5475
5476 $migrate_uri = "unix:$socket_addr";
5477
5478 push @$cmd, '-incoming', $migrate_uri;
5479 push @$cmd, '-S';
5480
5481 } elsif (-e $statefile) {
5482 push @$cmd, '-loadstate', $statefile;
5483 } else {
5484 my $statepath = PVE::Storage::path($storecfg, $statefile);
5485 push @$vollist, $statefile;
5486 push @$cmd, '-loadstate', $statepath;
5487 }
5488 } elsif ($params->{paused}) {
5489 push @$cmd, '-S';
5490 }
5491
5492 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5493
5494 my $pci_devices = {}; # host pci devices
5495 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
5496 my $dev = $conf->{"hostpci$i"} or next;
5497 $pci_devices->{$i} = parse_hostpci($dev);
5498 }
5499
5500 # do not reserve pciid for mediated devices, sysfs will error out for duplicate assignment
5501 my $real_pci_devices = [ grep { !(defined($_->{mdev}) && scalar($_->{pciid}->@*) == 1) } values $pci_devices->%* ];
5502
5503 # map to a flat list of pci ids
5504 my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } $real_pci_devices->@* ];
5505
5506 # reserve all PCI IDs before actually doing anything with them
5507 PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout);
5508
5509 eval {
5510 for my $id (sort keys %$pci_devices) {
5511 my $d = $pci_devices->{$id};
5512 for my $dev ($d->{pciid}->@*) {
5513 PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
5514 }
5515 }
5516 };
5517 if (my $err = $@) {
5518 eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
5519 warn $@ if $@;
5520 die $err;
5521 }
5522
5523 PVE::Storage::activate_volumes($storecfg, $vollist);
5524
5525 eval {
5526 run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{});
5527 };
5528 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5529 # timeout should be more than enough here...
5530 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 5);
5531
5532 my $cpuunits = get_cpuunits($conf);
5533
5534 my %run_params = (
5535 timeout => $statefile ? undef : $start_timeout,
5536 umask => 0077,
5537 noerr => 1,
5538 );
5539
5540 # when migrating, prefix QEMU output so other side can pick up any
5541 # errors that might occur and show the user
5542 if ($migratedfrom) {
5543 $run_params{quiet} = 1;
5544 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5545 }
5546
5547 my %systemd_properties = (
5548 Slice => 'qemu.slice',
5549 KillMode => 'process',
5550 SendSIGKILL => 0,
5551 TimeoutStopUSec => ULONG_MAX, # infinity
5552 );
5553
5554 if (PVE::CGroup::cgroup_mode() == 2) {
5555 $cpuunits = 10000 if $cpuunits >= 10000; # else we get an error
5556 $systemd_properties{CPUWeight} = $cpuunits;
5557 } else {
5558 $systemd_properties{CPUShares} = $cpuunits;
5559 }
5560
5561 if (my $cpulimit = $conf->{cpulimit}) {
5562 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5563 }
5564 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5565
5566 my $run_qemu = sub {
5567 PVE::Tools::run_fork sub {
5568 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5569
5570 my $tpmpid;
5571 if (my $tpm = $conf->{tpmstate0}) {
5572 # start the TPM emulator so QEMU can connect on start
5573 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5574 }
5575
5576 my $exitcode = run_command($cmd, %run_params);
5577 if ($exitcode) {
5578 if ($tpmpid) {
5579 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5580 kill 'TERM', $tpmpid;
5581 }
5582 die "QEMU exited with code $exitcode\n";
5583 }
5584 };
5585 };
5586
5587 if ($conf->{hugepages}) {
5588
5589 my $code = sub {
5590 my $hugepages_topology = PVE::QemuServer::Memory::hugepages_topology($conf);
5591 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5592
5593 PVE::QemuServer::Memory::hugepages_mount();
5594 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5595
5596 eval { $run_qemu->() };
5597 if (my $err = $@) {
5598 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5599 if !$conf->{keephugepages};
5600 die $err;
5601 }
5602
5603 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5604 if !$conf->{keephugepages};
5605 };
5606 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5607
5608 } else {
5609 eval { $run_qemu->() };
5610 }
5611
5612 if (my $err = $@) {
5613 # deactivate volumes if start fails
5614 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5615 eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
5616
5617 die "start failed: $err";
5618 }
5619
5620 # re-reserve all PCI IDs now that we can know the actual VM PID
5621 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5622 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) };
5623 warn $@ if $@;
5624
5625 print "migration listens on $migrate_uri\n" if $migrate_uri;
5626 $res->{migrate_uri} = $migrate_uri;
5627
5628 if ($statefile && $statefile ne 'tcp' && $statefile ne 'unix') {
5629 eval { mon_cmd($vmid, "cont"); };
5630 warn $@ if $@;
5631 }
5632
5633 #start nbd server for storage migration
5634 if (my $nbd = $migrate_opts->{nbd}) {
5635 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
5636
5637 my $migrate_storage_uri;
5638 # nbd_protocol_version > 0 for unix socket support
5639 if ($nbd_protocol_version > 0 && $migration_type eq 'secure') {
5640 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5641 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5642 $migrate_storage_uri = "nbd:unix:$socket_path";
5643 } else {
5644 my $nodename = nodename();
5645 my $localip = $get_migration_ip->($nodename);
5646 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5647 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
5648
5649 mon_cmd($vmid, "nbd-server-start", addr => {
5650 type => 'inet',
5651 data => {
5652 host => "${localip}",
5653 port => "${storage_migrate_port}",
5654 },
5655 });
5656 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5657 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
5658 }
5659
5660 $res->{migrate_storage_uri} = $migrate_storage_uri;
5661
5662 foreach my $opt (sort keys %$nbd) {
5663 my $drivestr = $nbd->{$opt}->{drivestr};
5664 my $volid = $nbd->{$opt}->{volid};
5665 mon_cmd($vmid, "nbd-server-add", device => "drive-$opt", writable => JSON::true );
5666 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
5667 print "storage migration listens on $nbd_uri volume:$drivestr\n";
5668 print "re-using replicated volume: $opt - $volid\n"
5669 if $nbd->{$opt}->{replicated};
5670
5671 $res->{drives}->{$opt} = $nbd->{$opt};
5672 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
5673 }
5674 }
5675
5676 if ($migratedfrom) {
5677 eval {
5678 set_migration_caps($vmid);
5679 };
5680 warn $@ if $@;
5681
5682 if ($spice_port) {
5683 print "spice listens on port $spice_port\n";
5684 $res->{spice_port} = $spice_port;
5685 if ($migrate_opts->{spice_ticket}) {
5686 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
5687 $migrate_opts->{spice_ticket});
5688 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
5689 }
5690 }
5691
5692 } else {
5693 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
5694 if !$statefile && $conf->{balloon};
5695
5696 foreach my $opt (keys %$conf) {
5697 next if $opt !~ m/^net\d+$/;
5698 my $nicconf = parse_net($conf->{$opt});
5699 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
5700 }
5701 }
5702
5703 mon_cmd($vmid, 'qom-set',
5704 path => "machine/peripheral/balloon0",
5705 property => "guest-stats-polling-interval",
5706 value => 2) if (!defined($conf->{balloon}) || $conf->{balloon});
5707
5708 if ($resume) {
5709 print "Resumed VM, removing state\n";
5710 if (my $vmstate = $conf->{vmstate}) {
5711 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
5712 PVE::Storage::vdisk_free($storecfg, $vmstate);
5713 }
5714 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
5715 PVE::QemuConfig->write_config($vmid, $conf);
5716 }
5717
5718 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
5719
5720 return $res;
5721}
5722
5723sub vm_commandline {
5724 my ($storecfg, $vmid, $snapname) = @_;
5725
5726 my $conf = PVE::QemuConfig->load_config($vmid);
5727
5728 my ($forcemachine, $forcecpu);
5729 if ($snapname) {
5730 my $snapshot = $conf->{snapshots}->{$snapname};
5731 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
5732
5733 # check for machine or CPU overrides in snapshot
5734 $forcemachine = $snapshot->{runningmachine};
5735 $forcecpu = $snapshot->{runningcpu};
5736
5737 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
5738
5739 $conf = $snapshot;
5740 }
5741
5742 my $defaults = load_defaults();
5743
5744 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
5745
5746 return PVE::Tools::cmd2string($cmd);
5747}
5748
5749sub vm_reset {
5750 my ($vmid, $skiplock) = @_;
5751
5752 PVE::QemuConfig->lock_config($vmid, sub {
5753
5754 my $conf = PVE::QemuConfig->load_config($vmid);
5755
5756 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5757
5758 mon_cmd($vmid, "system_reset");
5759 });
5760}
5761
5762sub get_vm_volumes {
5763 my ($conf) = @_;
5764
5765 my $vollist = [];
5766 foreach_volid($conf, sub {
5767 my ($volid, $attr) = @_;
5768
5769 return if $volid =~ m|^/|;
5770
5771 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
5772 return if !$sid;
5773
5774 push @$vollist, $volid;
5775 });
5776
5777 return $vollist;
5778}
5779
5780sub vm_stop_cleanup {
5781 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
5782
5783 eval {
5784
5785 if (!$keepActive) {
5786 my $vollist = get_vm_volumes($conf);
5787 PVE::Storage::deactivate_volumes($storecfg, $vollist);
5788
5789 if (my $tpmdrive = $conf->{tpmstate0}) {
5790 my $tpm = parse_drive("tpmstate0", $tpmdrive);
5791 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
5792 if ($storeid) {
5793 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
5794 }
5795 }
5796 }
5797
5798 foreach my $ext (qw(mon qmp pid vnc qga)) {
5799 unlink "/var/run/qemu-server/${vmid}.$ext";
5800 }
5801
5802 if ($conf->{ivshmem}) {
5803 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
5804 # just delete it for now, VMs which have this already open do not
5805 # are affected, but new VMs will get a separated one. If this
5806 # becomes an issue we either add some sort of ref-counting or just
5807 # add a "don't delete on stop" flag to the ivshmem format.
5808 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
5809 }
5810
5811 my $ids = [];
5812 foreach my $key (keys %$conf) {
5813 next if $key !~ m/^hostpci(\d+)$/;
5814 my $hostpciindex = $1;
5815 my $d = parse_hostpci($conf->{$key});
5816 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
5817
5818 foreach my $pci (@{$d->{pciid}}) {
5819 my $pciid = $pci->{id};
5820 push @$ids, $pci->{id};
5821 PVE::SysFSTools::pci_cleanup_mdev_device($pciid, $uuid);
5822 }
5823 }
5824 PVE::QemuServer::PCI::remove_pci_reservation($ids);
5825
5826 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
5827 };
5828 warn $@ if $@; # avoid errors - just warn
5829}
5830
5831# call only in locked context
5832sub _do_vm_stop {
5833 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
5834
5835 my $pid = check_running($vmid, $nocheck);
5836 return if !$pid;
5837
5838 my $conf;
5839 if (!$nocheck) {
5840 $conf = PVE::QemuConfig->load_config($vmid);
5841 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5842 if (!defined($timeout) && $shutdown && $conf->{startup}) {
5843 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
5844 $timeout = $opts->{down} if $opts->{down};
5845 }
5846 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
5847 }
5848
5849 eval {
5850 if ($shutdown) {
5851 if (defined($conf) && get_qga_key($conf, 'enabled')) {
5852 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
5853 } else {
5854 mon_cmd($vmid, "system_powerdown");
5855 }
5856 } else {
5857 mon_cmd($vmid, "quit");
5858 }
5859 };
5860 my $err = $@;
5861
5862 if (!$err) {
5863 $timeout = 60 if !defined($timeout);
5864
5865 my $count = 0;
5866 while (($count < $timeout) && check_running($vmid, $nocheck)) {
5867 $count++;
5868 sleep 1;
5869 }
5870
5871 if ($count >= $timeout) {
5872 if ($force) {
5873 warn "VM still running - terminating now with SIGTERM\n";
5874 kill 15, $pid;
5875 } else {
5876 die "VM quit/powerdown failed - got timeout\n";
5877 }
5878 } else {
5879 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
5880 return;
5881 }
5882 } else {
5883 if (!check_running($vmid, $nocheck)) {
5884 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
5885 return;
5886 }
5887 if ($force) {
5888 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
5889 kill 15, $pid;
5890 } else {
5891 die "VM quit/powerdown failed\n";
5892 }
5893 }
5894
5895 # wait again
5896 $timeout = 10;
5897
5898 my $count = 0;
5899 while (($count < $timeout) && check_running($vmid, $nocheck)) {
5900 $count++;
5901 sleep 1;
5902 }
5903
5904 if ($count >= $timeout) {
5905 warn "VM still running - terminating now with SIGKILL\n";
5906 kill 9, $pid;
5907 sleep 1;
5908 }
5909
5910 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
5911}
5912
5913# Note: use $nocheck to skip tests if VM configuration file exists.
5914# We need that when migration VMs to other nodes (files already moved)
5915# Note: we set $keepActive in vzdump stop mode - volumes need to stay active
5916sub vm_stop {
5917 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
5918
5919 $force = 1 if !defined($force) && !$shutdown;
5920
5921 if ($migratedfrom){
5922 my $pid = check_running($vmid, $nocheck, $migratedfrom);
5923 kill 15, $pid if $pid;
5924 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
5925 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
5926 return;
5927 }
5928
5929 PVE::QemuConfig->lock_config($vmid, sub {
5930 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
5931 });
5932}
5933
5934sub vm_reboot {
5935 my ($vmid, $timeout) = @_;
5936
5937 PVE::QemuConfig->lock_config($vmid, sub {
5938 eval {
5939
5940 # only reboot if running, as qmeventd starts it again on a stop event
5941 return if !check_running($vmid);
5942
5943 create_reboot_request($vmid);
5944
5945 my $storecfg = PVE::Storage::config();
5946 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
5947
5948 };
5949 if (my $err = $@) {
5950 # avoid that the next normal shutdown will be confused for a reboot
5951 clear_reboot_request($vmid);
5952 die $err;
5953 }
5954 });
5955}
5956
5957# note: if using the statestorage parameter, the caller has to check privileges
5958sub vm_suspend {
5959 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
5960
5961 my $conf;
5962 my $path;
5963 my $storecfg;
5964 my $vmstate;
5965
5966 PVE::QemuConfig->lock_config($vmid, sub {
5967
5968 $conf = PVE::QemuConfig->load_config($vmid);
5969
5970 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
5971 PVE::QemuConfig->check_lock($conf)
5972 if !($skiplock || $is_backing_up);
5973
5974 die "cannot suspend to disk during backup\n"
5975 if $is_backing_up && $includestate;
5976
5977 if ($includestate) {
5978 $conf->{lock} = 'suspending';
5979 my $date = strftime("%Y-%m-%d", localtime(time()));
5980 $storecfg = PVE::Storage::config();
5981 if (!$statestorage) {
5982 $statestorage = find_vmstate_storage($conf, $storecfg);
5983 # check permissions for the storage
5984 my $rpcenv = PVE::RPCEnvironment::get();
5985 if ($rpcenv->{type} ne 'cli') {
5986 my $authuser = $rpcenv->get_user();
5987 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
5988 }
5989 }
5990
5991
5992 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
5993 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
5994 $path = PVE::Storage::path($storecfg, $vmstate);
5995 PVE::QemuConfig->write_config($vmid, $conf);
5996 } else {
5997 mon_cmd($vmid, "stop");
5998 }
5999 });
6000
6001 if ($includestate) {
6002 # save vm state
6003 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
6004
6005 eval {
6006 set_migration_caps($vmid, 1);
6007 mon_cmd($vmid, "savevm-start", statefile => $path);
6008 for(;;) {
6009 my $state = mon_cmd($vmid, "query-savevm");
6010 if (!$state->{status}) {
6011 die "savevm not active\n";
6012 } elsif ($state->{status} eq 'active') {
6013 sleep(1);
6014 next;
6015 } elsif ($state->{status} eq 'completed') {
6016 print "State saved, quitting\n";
6017 last;
6018 } elsif ($state->{status} eq 'failed' && $state->{error}) {
6019 die "query-savevm failed with error '$state->{error}'\n"
6020 } else {
6021 die "query-savevm returned status '$state->{status}'\n";
6022 }
6023 }
6024 };
6025 my $err = $@;
6026
6027 PVE::QemuConfig->lock_config($vmid, sub {
6028 $conf = PVE::QemuConfig->load_config($vmid);
6029 if ($err) {
6030 # cleanup, but leave suspending lock, to indicate something went wrong
6031 eval {
6032 mon_cmd($vmid, "savevm-end");
6033 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6034 PVE::Storage::vdisk_free($storecfg, $vmstate);
6035 delete $conf->@{qw(vmstate runningmachine runningcpu)};
6036 PVE::QemuConfig->write_config($vmid, $conf);
6037 };
6038 warn $@ if $@;
6039 die $err;
6040 }
6041
6042 die "lock changed unexpectedly\n"
6043 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6044
6045 mon_cmd($vmid, "quit");
6046 $conf->{lock} = 'suspended';
6047 PVE::QemuConfig->write_config($vmid, $conf);
6048 });
6049 }
6050}
6051
6052sub vm_resume {
6053 my ($vmid, $skiplock, $nocheck) = @_;
6054
6055 PVE::QemuConfig->lock_config($vmid, sub {
6056 my $res = mon_cmd($vmid, 'query-status');
6057 my $resume_cmd = 'cont';
6058 my $reset = 0;
6059
6060 if ($res->{status}) {
6061 return if $res->{status} eq 'running'; # job done, go home
6062 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6063 $reset = 1 if $res->{status} eq 'shutdown';
6064 }
6065
6066 if (!$nocheck) {
6067
6068 my $conf = PVE::QemuConfig->load_config($vmid);
6069
6070 PVE::QemuConfig->check_lock($conf)
6071 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
6072 }
6073
6074 if ($reset) {
6075 # required if a VM shuts down during a backup and we get a resume
6076 # request before the backup finishes for example
6077 mon_cmd($vmid, "system_reset");
6078 }
6079 mon_cmd($vmid, $resume_cmd);
6080 });
6081}
6082
6083sub vm_sendkey {
6084 my ($vmid, $skiplock, $key) = @_;
6085
6086 PVE::QemuConfig->lock_config($vmid, sub {
6087
6088 my $conf = PVE::QemuConfig->load_config($vmid);
6089
6090 # there is no qmp command, so we use the human monitor command
6091 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
6092 die $res if $res ne '';
6093 });
6094}
6095
6096# vzdump restore implementaion
6097
6098sub tar_archive_read_firstfile {
6099 my $archive = shift;
6100
6101 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6102
6103 # try to detect archive type first
6104 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
6105 die "unable to open file '$archive'\n";
6106 my $firstfile = <$fh>;
6107 kill 15, $pid;
6108 close $fh;
6109
6110 die "ERROR: archive contaions no data\n" if !$firstfile;
6111 chomp $firstfile;
6112
6113 return $firstfile;
6114}
6115
6116sub tar_restore_cleanup {
6117 my ($storecfg, $statfile) = @_;
6118
6119 print STDERR "starting cleanup\n";
6120
6121 if (my $fd = IO::File->new($statfile, "r")) {
6122 while (defined(my $line = <$fd>)) {
6123 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6124 my $volid = $2;
6125 eval {
6126 if ($volid =~ m|^/|) {
6127 unlink $volid || die 'unlink failed\n';
6128 } else {
6129 PVE::Storage::vdisk_free($storecfg, $volid);
6130 }
6131 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6132 };
6133 print STDERR "unable to cleanup '$volid' - $@" if $@;
6134 } else {
6135 print STDERR "unable to parse line in statfile - $line";
6136 }
6137 }
6138 $fd->close();
6139 }
6140}
6141
6142sub restore_file_archive {
6143 my ($archive, $vmid, $user, $opts) = @_;
6144
6145 return restore_vma_archive($archive, $vmid, $user, $opts)
6146 if $archive eq '-';
6147
6148 my $info = PVE::Storage::archive_info($archive);
6149 my $format = $opts->{format} // $info->{format};
6150 my $comp = $info->{compression};
6151
6152 # try to detect archive format
6153 if ($format eq 'tar') {
6154 return restore_tar_archive($archive, $vmid, $user, $opts);
6155 } else {
6156 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6157 }
6158}
6159
6160# hepler to remove disks that will not be used after restore
6161my $restore_cleanup_oldconf = sub {
6162 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6163
6164 PVE::QemuConfig->foreach_volume($oldconf, sub {
6165 my ($ds, $drive) = @_;
6166
6167 return if drive_is_cdrom($drive, 1);
6168
6169 my $volid = $drive->{file};
6170 return if !$volid || $volid =~ m|^/|;
6171
6172 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6173 return if !$path || !$owner || ($owner != $vmid);
6174
6175 # Note: only delete disk we want to restore
6176 # other volumes will become unused
6177 if ($virtdev_hash->{$ds}) {
6178 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6179 if (my $err = $@) {
6180 warn $err;
6181 }
6182 }
6183 });
6184
6185 # delete vmstate files, after the restore we have no snapshots anymore
6186 foreach my $snapname (keys %{$oldconf->{snapshots}}) {
6187 my $snap = $oldconf->{snapshots}->{$snapname};
6188 if ($snap->{vmstate}) {
6189 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6190 if (my $err = $@) {
6191 warn $err;
6192 }
6193 }
6194 }
6195};
6196
6197# Helper to parse vzdump backup device hints
6198#
6199# $rpcenv: Environment, used to ckeck storage permissions
6200# $user: User ID, to check storage permissions
6201# $storecfg: Storage configuration
6202# $fh: the file handle for reading the configuration
6203# $devinfo: should contain device sizes for all backu-up'ed devices
6204# $options: backup options (pool, default storage)
6205#
6206# Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6207my $parse_backup_hints = sub {
6208 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6209
6210 my $virtdev_hash = {};
6211
6212 while (defined(my $line = <$fh>)) {
6213 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6214 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6215 die "archive does not contain data for drive '$virtdev'\n"
6216 if !$devinfo->{$devname};
6217
6218 if (defined($options->{storage})) {
6219 $storeid = $options->{storage} || 'local';
6220 } elsif (!$storeid) {
6221 $storeid = 'local';
6222 }
6223 $format = 'raw' if !$format;
6224 $devinfo->{$devname}->{devname} = $devname;
6225 $devinfo->{$devname}->{virtdev} = $virtdev;
6226 $devinfo->{$devname}->{format} = $format;
6227 $devinfo->{$devname}->{storeid} = $storeid;
6228
6229 # check permission on storage
6230 my $pool = $options->{pool}; # todo: do we need that?
6231 if ($user ne 'root@pam') {
6232 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace']);
6233 }
6234
6235 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6236 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6237 my $virtdev = $1;
6238 my $drive = parse_drive($virtdev, $2);
6239 if (drive_is_cloudinit($drive)) {
6240 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6241 $storeid = $options->{storage} if defined ($options->{storage});
6242 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6243 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6244
6245 $virtdev_hash->{$virtdev} = {
6246 format => $format,
6247 storeid => $storeid,
6248 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6249 is_cloudinit => 1,
6250 };
6251 }
6252 }
6253 }
6254
6255 return $virtdev_hash;
6256};
6257
6258# Helper to allocate and activate all volumes required for a restore
6259#
6260# $storecfg: Storage configuration
6261# $virtdev_hash: as returned by parse_backup_hints()
6262#
6263# Returns: { $virtdev => $volid }
6264my $restore_allocate_devices = sub {
6265 my ($storecfg, $virtdev_hash, $vmid) = @_;
6266
6267 my $map = {};
6268 foreach my $virtdev (sort keys %$virtdev_hash) {
6269 my $d = $virtdev_hash->{$virtdev};
6270 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6271 my $storeid = $d->{storeid};
6272 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6273
6274 # test if requested format is supported
6275 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6276 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6277 $d->{format} = $defFormat if !$supported;
6278
6279 my $name;
6280 if ($d->{is_cloudinit}) {
6281 $name = "vm-$vmid-cloudinit";
6282 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6283 if ($scfg->{path}) {
6284 $name .= ".$d->{format}";
6285 }
6286 }
6287
6288 my $volid = PVE::Storage::vdisk_alloc(
6289 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6290
6291 print STDERR "new volume ID is '$volid'\n";
6292 $d->{volid} = $volid;
6293
6294 PVE::Storage::activate_volumes($storecfg, [$volid]);
6295
6296 $map->{$virtdev} = $volid;
6297 }
6298
6299 return $map;
6300};
6301
6302sub restore_update_config_line {
6303 my ($cookie, $map, $line, $unique) = @_;
6304
6305 return '' if $line =~ m/^\#qmdump\#/;
6306 return '' if $line =~ m/^\#vzdump\#/;
6307 return '' if $line =~ m/^lock:/;
6308 return '' if $line =~ m/^unused\d+:/;
6309 return '' if $line =~ m/^parent:/;
6310
6311 my $res = '';
6312
6313 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6314 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6315 # try to convert old 1.X settings
6316 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6317 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6318 my ($model, $macaddr) = split(/\=/, $devconfig);
6319 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6320 my $net = {
6321 model => $model,
6322 bridge => "vmbr$ind",
6323 macaddr => $macaddr,
6324 };
6325 my $netstr = print_net($net);
6326
6327 $res .= "net$cookie->{netcount}: $netstr\n";
6328 $cookie->{netcount}++;
6329 }
6330 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6331 my ($id, $netstr) = ($1, $2);
6332 my $net = parse_net($netstr);
6333 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6334 $netstr = print_net($net);
6335 $res .= "$id: $netstr\n";
6336 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6337 my $virtdev = $1;
6338 my $value = $3;
6339 my $di = parse_drive($virtdev, $value);
6340 if (defined($di->{backup}) && !$di->{backup}) {
6341 $res .= "#$line";
6342 } elsif ($map->{$virtdev}) {
6343 delete $di->{format}; # format can change on restore
6344 $di->{file} = $map->{$virtdev};
6345 $value = print_drive($di);
6346 $res .= "$virtdev: $value\n";
6347 } else {
6348 $res .= $line;
6349 }
6350 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6351 my $vmgenid = $1;
6352 if ($vmgenid ne '0') {
6353 # always generate a new vmgenid if there was a valid one setup
6354 $vmgenid = generate_uuid();
6355 }
6356 $res .= "vmgenid: $vmgenid\n";
6357 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6358 my ($uuid, $uuid_str);
6359 UUID::generate($uuid);
6360 UUID::unparse($uuid, $uuid_str);
6361 my $smbios1 = parse_smbios1($2);
6362 $smbios1->{uuid} = $uuid_str;
6363 $res .= $1.print_smbios1($smbios1)."\n";
6364 } else {
6365 $res .= $line;
6366 }
6367
6368 return $res;
6369}
6370
6371my $restore_deactivate_volumes = sub {
6372 my ($storecfg, $devinfo) = @_;
6373
6374 my $vollist = [];
6375 foreach my $devname (keys %$devinfo) {
6376 my $volid = $devinfo->{$devname}->{volid};
6377 push @$vollist, $volid if $volid;
6378 }
6379
6380 PVE::Storage::deactivate_volumes($storecfg, $vollist);
6381};
6382
6383my $restore_destroy_volumes = sub {
6384 my ($storecfg, $devinfo) = @_;
6385
6386 foreach my $devname (keys %$devinfo) {
6387 my $volid = $devinfo->{$devname}->{volid};
6388 next if !$volid;
6389 eval {
6390 if ($volid =~ m|^/|) {
6391 unlink $volid || die 'unlink failed\n';
6392 } else {
6393 PVE::Storage::vdisk_free($storecfg, $volid);
6394 }
6395 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6396 };
6397 print STDERR "unable to cleanup '$volid' - $@" if $@;
6398 }
6399};
6400
6401sub scan_volids {
6402 my ($cfg, $vmid) = @_;
6403
6404 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6405
6406 my $volid_hash = {};
6407 foreach my $storeid (keys %$info) {
6408 foreach my $item (@{$info->{$storeid}}) {
6409 next if !($item->{volid} && $item->{size});
6410 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6411 $volid_hash->{$item->{volid}} = $item;
6412 }
6413 }
6414
6415 return $volid_hash;
6416}
6417
6418sub update_disk_config {
6419 my ($vmid, $conf, $volid_hash) = @_;
6420
6421 my $changes;
6422 my $prefix = "VM $vmid";
6423
6424 # used and unused disks
6425 my $referenced = {};
6426
6427 # Note: it is allowed to define multiple storages with same path (alias), so
6428 # we need to check both 'volid' and real 'path' (two different volid can point
6429 # to the same path).
6430
6431 my $referencedpath = {};
6432
6433 # update size info
6434 PVE::QemuConfig->foreach_volume($conf, sub {
6435 my ($opt, $drive) = @_;
6436
6437 my $volid = $drive->{file};
6438 return if !$volid;
6439 my $volume = $volid_hash->{$volid};
6440
6441 # mark volid as "in-use" for next step
6442 $referenced->{$volid} = 1;
6443 if ($volume && (my $path = $volume->{path})) {
6444 $referencedpath->{$path} = 1;
6445 }
6446
6447 return if drive_is_cdrom($drive);
6448 return if !$volume;
6449
6450 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6451 if (defined($updated)) {
6452 $changes = 1;
6453 $conf->{$opt} = print_drive($updated);
6454 print "$prefix ($opt): $msg\n";
6455 }
6456 });
6457
6458 # remove 'unusedX' entry if volume is used
6459 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6460 my ($opt, $drive) = @_;
6461
6462 my $volid = $drive->{file};
6463 return if !$volid;
6464
6465 my $path;
6466 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6467 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6468 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6469 $changes = 1;
6470 delete $conf->{$opt};
6471 }
6472
6473 $referenced->{$volid} = 1;
6474 $referencedpath->{$path} = 1 if $path;
6475 });
6476
6477 foreach my $volid (sort keys %$volid_hash) {
6478 next if $volid =~ m/vm-$vmid-state-/;
6479 next if $referenced->{$volid};
6480 my $path = $volid_hash->{$volid}->{path};
6481 next if !$path; # just to be sure
6482 next if $referencedpath->{$path};
6483 $changes = 1;
6484 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6485 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6486 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6487 }
6488
6489 return $changes;
6490}
6491
6492sub rescan {
6493 my ($vmid, $nolock, $dryrun) = @_;
6494
6495 my $cfg = PVE::Storage::config();
6496
6497 print "rescan volumes...\n";
6498 my $volid_hash = scan_volids($cfg, $vmid);
6499
6500 my $updatefn = sub {
6501 my ($vmid) = @_;
6502
6503 my $conf = PVE::QemuConfig->load_config($vmid);
6504
6505 PVE::QemuConfig->check_lock($conf);
6506
6507 my $vm_volids = {};
6508 foreach my $volid (keys %$volid_hash) {
6509 my $info = $volid_hash->{$volid};
6510 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6511 }
6512
6513 my $changes = update_disk_config($vmid, $conf, $vm_volids);
6514
6515 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
6516 };
6517
6518 if (defined($vmid)) {
6519 if ($nolock) {
6520 &$updatefn($vmid);
6521 } else {
6522 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6523 }
6524 } else {
6525 my $vmlist = config_list();
6526 foreach my $vmid (keys %$vmlist) {
6527 if ($nolock) {
6528 &$updatefn($vmid);
6529 } else {
6530 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6531 }
6532 }
6533 }
6534}
6535
6536sub restore_proxmox_backup_archive {
6537 my ($archive, $vmid, $user, $options) = @_;
6538
6539 my $storecfg = PVE::Storage::config();
6540
6541 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
6542 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6543
6544 my $fingerprint = $scfg->{fingerprint};
6545 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
6546
6547 my $repo = PVE::PBSClient::get_repository($scfg);
6548
6549 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
6550 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
6551 local $ENV{PBS_PASSWORD} = $password;
6552 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
6553
6554 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
6555 PVE::Storage::parse_volname($storecfg, $archive);
6556
6557 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
6558
6559 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
6560
6561 my $tmpdir = "/var/tmp/vzdumptmp$$";
6562 rmtree $tmpdir;
6563 mkpath $tmpdir;
6564
6565 my $conffile = PVE::QemuConfig->config_file($vmid);
6566 # disable interrupts (always do cleanups)
6567 local $SIG{INT} =
6568 local $SIG{TERM} =
6569 local $SIG{QUIT} =
6570 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
6571
6572 # Note: $oldconf is undef if VM does not exists
6573 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6574 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6575 my $new_conf_raw = '';
6576
6577 my $rpcenv = PVE::RPCEnvironment::get();
6578 my $devinfo = {};
6579
6580 eval {
6581 # enable interrupts
6582 local $SIG{INT} =
6583 local $SIG{TERM} =
6584 local $SIG{QUIT} =
6585 local $SIG{HUP} =
6586 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6587
6588 my $cfgfn = "$tmpdir/qemu-server.conf";
6589 my $firewall_config_fn = "$tmpdir/fw.conf";
6590 my $index_fn = "$tmpdir/index.json";
6591
6592 my $cmd = "restore";
6593
6594 my $param = [$pbs_backup_name, "index.json", $index_fn];
6595 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6596 my $index = PVE::Tools::file_get_contents($index_fn);
6597 $index = decode_json($index);
6598
6599 # print Dumper($index);
6600 foreach my $info (@{$index->{files}}) {
6601 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
6602 my $devname = $1;
6603 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
6604 $devinfo->{$devname}->{size} = $1;
6605 } else {
6606 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
6607 }
6608 }
6609 }
6610
6611 my $is_qemu_server_backup = scalar(
6612 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
6613 );
6614 if (!$is_qemu_server_backup) {
6615 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
6616 }
6617 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
6618
6619 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
6620 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6621
6622 if ($has_firewall_config) {
6623 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
6624 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6625
6626 my $pve_firewall_dir = '/etc/pve/firewall';
6627 mkdir $pve_firewall_dir; # make sure the dir exists
6628 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
6629 }
6630
6631 my $fh = IO::File->new($cfgfn, "r") ||
6632 die "unable to read qemu-server.conf - $!\n";
6633
6634 my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
6635
6636 # fixme: rate limit?
6637
6638 # create empty/temp config
6639 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
6640
6641 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
6642
6643 # allocate volumes
6644 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
6645
6646 foreach my $virtdev (sort keys %$virtdev_hash) {
6647 my $d = $virtdev_hash->{$virtdev};
6648 next if $d->{is_cloudinit}; # no need to restore cloudinit
6649
6650 # this fails if storage is unavailable
6651 my $volid = $d->{volid};
6652 my $path = PVE::Storage::path($storecfg, $volid);
6653
6654 # for live-restore we only want to preload the efidisk and TPM state
6655 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
6656
6657 my $pbs_restore_cmd = [
6658 '/usr/bin/pbs-restore',
6659 '--repository', $repo,
6660 $pbs_backup_name,
6661 "$d->{devname}.img.fidx",
6662 $path,
6663 '--verbose',
6664 ];
6665
6666 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
6667 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
6668
6669 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
6670 push @$pbs_restore_cmd, '--skip-zero';
6671 }
6672
6673 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
6674 print "restore proxmox backup image: $dbg_cmdstring\n";
6675 run_command($pbs_restore_cmd);
6676 }
6677
6678 $fh->seek(0, 0) || die "seek failed - $!\n";
6679
6680 my $cookie = { netcount => 0 };
6681 while (defined(my $line = <$fh>)) {
6682 $new_conf_raw .= restore_update_config_line(
6683 $cookie,
6684 $map,
6685 $line,
6686 $options->{unique},
6687 );
6688 }
6689
6690 $fh->close();
6691 };
6692 my $err = $@;
6693
6694 if ($err || !$options->{live}) {
6695 $restore_deactivate_volumes->($storecfg, $devinfo);
6696 }
6697
6698 rmtree $tmpdir;
6699
6700 if ($err) {
6701 $restore_destroy_volumes->($storecfg, $devinfo);
6702 die $err;
6703 }
6704
6705 if ($options->{live}) {
6706 # keep lock during live-restore
6707 $new_conf_raw .= "\nlock: create";
6708 }
6709
6710 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
6711
6712 PVE::Cluster::cfs_update(); # make sure we read new file
6713
6714 eval { rescan($vmid, 1); };
6715 warn $@ if $@;
6716
6717 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
6718
6719 if ($options->{live}) {
6720 # enable interrupts
6721 local $SIG{INT} =
6722 local $SIG{TERM} =
6723 local $SIG{QUIT} =
6724 local $SIG{HUP} =
6725 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
6726
6727 my $conf = PVE::QemuConfig->load_config($vmid);
6728 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
6729
6730 # these special drives are already restored before start
6731 delete $devinfo->{'drive-efidisk0'};
6732 delete $devinfo->{'drive-tpmstate0-backup'};
6733 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $repo, $keyfile, $pbs_backup_name);
6734
6735 PVE::QemuConfig->remove_lock($vmid, "create");
6736 }
6737}
6738
6739sub pbs_live_restore {
6740 my ($vmid, $conf, $storecfg, $restored_disks, $repo, $keyfile, $snap) = @_;
6741
6742 print "starting VM for live-restore\n";
6743 print "repository: '$repo', snapshot: '$snap'\n";
6744
6745 my $pbs_backing = {};
6746 for my $ds (keys %$restored_disks) {
6747 $ds =~ m/^drive-(.*)$/;
6748 my $confname = $1;
6749 $pbs_backing->{$confname} = {
6750 repository => $repo,
6751 snapshot => $snap,
6752 archive => "$ds.img.fidx",
6753 };
6754 $pbs_backing->{$confname}->{keyfile} = $keyfile if -e $keyfile;
6755
6756 my $drive = parse_drive($confname, $conf->{$confname});
6757 print "restoring '$ds' to '$drive->{file}'\n";
6758 }
6759
6760 my $drives_streamed = 0;
6761 eval {
6762 # make sure HA doesn't interrupt our restore by stopping the VM
6763 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
6764 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
6765 }
6766
6767 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
6768 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
6769 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
6770
6771 my $qmeventd_fd = register_qmeventd_handle($vmid);
6772
6773 # begin streaming, i.e. data copy from PBS to target disk for every vol,
6774 # this will effectively collapse the backing image chain consisting of
6775 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
6776 # removes itself once all backing images vanish with 'auto-remove=on')
6777 my $jobs = {};
6778 for my $ds (sort keys %$restored_disks) {
6779 my $job_id = "restore-$ds";
6780 mon_cmd($vmid, 'block-stream',
6781 'job-id' => $job_id,
6782 device => "$ds",
6783 );
6784 $jobs->{$job_id} = {};
6785 }
6786
6787 mon_cmd($vmid, 'cont');
6788 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
6789
6790 print "restore-drive jobs finished successfully, removing all tracking block devices"
6791 ." to disconnect from Proxmox Backup Server\n";
6792
6793 for my $ds (sort keys %$restored_disks) {
6794 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
6795 }
6796
6797 close($qmeventd_fd);
6798 };
6799
6800 my $err = $@;
6801
6802 if ($err) {
6803 warn "An error occured during live-restore: $err\n";
6804 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
6805 die "live-restore failed\n";
6806 }
6807}
6808
6809sub restore_vma_archive {
6810 my ($archive, $vmid, $user, $opts, $comp) = @_;
6811
6812 my $readfrom = $archive;
6813
6814 my $cfg = PVE::Storage::config();
6815 my $commands = [];
6816 my $bwlimit = $opts->{bwlimit};
6817
6818 my $dbg_cmdstring = '';
6819 my $add_pipe = sub {
6820 my ($cmd) = @_;
6821 push @$commands, $cmd;
6822 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
6823 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
6824 $readfrom = '-';
6825 };
6826
6827 my $input = undef;
6828 if ($archive eq '-') {
6829 $input = '<&STDIN';
6830 } else {
6831 # If we use a backup from a PVE defined storage we also consider that
6832 # storage's rate limit:
6833 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
6834 if (defined($volid)) {
6835 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
6836 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
6837 if ($readlimit) {
6838 print STDERR "applying read rate limit: $readlimit\n";
6839 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
6840 $add_pipe->($cstream);
6841 }
6842 }
6843 }
6844
6845 if ($comp) {
6846 my $info = PVE::Storage::decompressor_info('vma', $comp);
6847 my $cmd = $info->{decompressor};
6848 push @$cmd, $readfrom;
6849 $add_pipe->($cmd);
6850 }
6851
6852 my $tmpdir = "/var/tmp/vzdumptmp$$";
6853 rmtree $tmpdir;
6854
6855 # disable interrupts (always do cleanups)
6856 local $SIG{INT} =
6857 local $SIG{TERM} =
6858 local $SIG{QUIT} =
6859 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
6860
6861 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
6862 POSIX::mkfifo($mapfifo, 0600);
6863 my $fifofh;
6864 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
6865
6866 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
6867
6868 my $oldtimeout;
6869 my $timeout = 5;
6870
6871 my $devinfo = {};
6872
6873 my $rpcenv = PVE::RPCEnvironment::get();
6874
6875 my $conffile = PVE::QemuConfig->config_file($vmid);
6876
6877 # Note: $oldconf is undef if VM does not exist
6878 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6879 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6880 my $new_conf_raw = '';
6881
6882 my %storage_limits;
6883
6884 my $print_devmap = sub {
6885 my $cfgfn = "$tmpdir/qemu-server.conf";
6886
6887 # we can read the config - that is already extracted
6888 my $fh = IO::File->new($cfgfn, "r") ||
6889 die "unable to read qemu-server.conf - $!\n";
6890
6891 my $fwcfgfn = "$tmpdir/qemu-server.fw";
6892 if (-f $fwcfgfn) {
6893 my $pve_firewall_dir = '/etc/pve/firewall';
6894 mkdir $pve_firewall_dir; # make sure the dir exists
6895 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
6896 }
6897
6898 my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
6899
6900 foreach my $info (values %{$virtdev_hash}) {
6901 my $storeid = $info->{storeid};
6902 next if defined($storage_limits{$storeid});
6903
6904 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
6905 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
6906 $storage_limits{$storeid} = $limit * 1024;
6907 }
6908
6909 foreach my $devname (keys %$devinfo) {
6910 die "found no device mapping information for device '$devname'\n"
6911 if !$devinfo->{$devname}->{virtdev};
6912 }
6913
6914 # create empty/temp config
6915 if ($oldconf) {
6916 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
6917 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
6918 }
6919
6920 # allocate volumes
6921 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
6922
6923 # print restore information to $fifofh
6924 foreach my $virtdev (sort keys %$virtdev_hash) {
6925 my $d = $virtdev_hash->{$virtdev};
6926 next if $d->{is_cloudinit}; # no need to restore cloudinit
6927
6928 my $storeid = $d->{storeid};
6929 my $volid = $d->{volid};
6930
6931 my $map_opts = '';
6932 if (my $limit = $storage_limits{$storeid}) {
6933 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
6934 }
6935
6936 my $write_zeros = 1;
6937 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
6938 $write_zeros = 0;
6939 }
6940
6941 my $path = PVE::Storage::path($cfg, $volid);
6942
6943 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
6944
6945 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
6946 }
6947
6948 $fh->seek(0, 0) || die "seek failed - $!\n";
6949
6950 my $cookie = { netcount => 0 };
6951 while (defined(my $line = <$fh>)) {
6952 $new_conf_raw .= restore_update_config_line(
6953 $cookie,
6954 $map,
6955 $line,
6956 $opts->{unique},
6957 );
6958 }
6959
6960 $fh->close();
6961 };
6962
6963 eval {
6964 # enable interrupts
6965 local $SIG{INT} =
6966 local $SIG{TERM} =
6967 local $SIG{QUIT} =
6968 local $SIG{HUP} =
6969 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6970 local $SIG{ALRM} = sub { die "got timeout\n"; };
6971
6972 $oldtimeout = alarm($timeout);
6973
6974 my $parser = sub {
6975 my $line = shift;
6976
6977 print "$line\n";
6978
6979 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
6980 my ($dev_id, $size, $devname) = ($1, $2, $3);
6981 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
6982 } elsif ($line =~ m/^CTIME: /) {
6983 # we correctly received the vma config, so we can disable
6984 # the timeout now for disk allocation (set to 10 minutes, so
6985 # that we always timeout if something goes wrong)
6986 alarm(600);
6987 &$print_devmap();
6988 print $fifofh "done\n";
6989 my $tmp = $oldtimeout || 0;
6990 $oldtimeout = undef;
6991 alarm($tmp);
6992 close($fifofh);
6993 $fifofh = undef;
6994 }
6995 };
6996
6997 print "restore vma archive: $dbg_cmdstring\n";
6998 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
6999 };
7000 my $err = $@;
7001
7002 alarm($oldtimeout) if $oldtimeout;
7003
7004 $restore_deactivate_volumes->($cfg, $devinfo);
7005
7006 close($fifofh) if $fifofh;
7007 unlink $mapfifo;
7008 rmtree $tmpdir;
7009
7010 if ($err) {
7011 $restore_destroy_volumes->($cfg, $devinfo);
7012 die $err;
7013 }
7014
7015 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7016
7017 PVE::Cluster::cfs_update(); # make sure we read new file
7018
7019 eval { rescan($vmid, 1); };
7020 warn $@ if $@;
7021
7022 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
7023}
7024
7025sub restore_tar_archive {
7026 my ($archive, $vmid, $user, $opts) = @_;
7027
7028 if ($archive ne '-') {
7029 my $firstfile = tar_archive_read_firstfile($archive);
7030 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
7031 if $firstfile ne 'qemu-server.conf';
7032 }
7033
7034 my $storecfg = PVE::Storage::config();
7035
7036 # avoid zombie disks when restoring over an existing VM -> cleanup first
7037 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
7038 # skiplock=1 because qmrestore has set the 'create' lock itself already
7039 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
7040 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
7041
7042 my $tocmd = "/usr/lib/qemu-server/qmextract";
7043
7044 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
7045 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
7046 $tocmd .= ' --prealloc' if $opts->{prealloc};
7047 $tocmd .= ' --info' if $opts->{info};
7048
7049 # tar option "xf" does not autodetect compression when read from STDIN,
7050 # so we pipe to zcat
7051 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7052 PVE::Tools::shellquote("--to-command=$tocmd");
7053
7054 my $tmpdir = "/var/tmp/vzdumptmp$$";
7055 mkpath $tmpdir;
7056
7057 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7058 local $ENV{VZDUMP_VMID} = $vmid;
7059 local $ENV{VZDUMP_USER} = $user;
7060
7061 my $conffile = PVE::QemuConfig->config_file($vmid);
7062 my $new_conf_raw = '';
7063
7064 # disable interrupts (always do cleanups)
7065 local $SIG{INT} =
7066 local $SIG{TERM} =
7067 local $SIG{QUIT} =
7068 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7069
7070 eval {
7071 # enable interrupts
7072 local $SIG{INT} =
7073 local $SIG{TERM} =
7074 local $SIG{QUIT} =
7075 local $SIG{HUP} =
7076 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7077
7078 if ($archive eq '-') {
7079 print "extracting archive from STDIN\n";
7080 run_command($cmd, input => "<&STDIN");
7081 } else {
7082 print "extracting archive '$archive'\n";
7083 run_command($cmd);
7084 }
7085
7086 return if $opts->{info};
7087
7088 # read new mapping
7089 my $map = {};
7090 my $statfile = "$tmpdir/qmrestore.stat";
7091 if (my $fd = IO::File->new($statfile, "r")) {
7092 while (defined (my $line = <$fd>)) {
7093 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7094 $map->{$1} = $2 if $1;
7095 } else {
7096 print STDERR "unable to parse line in statfile - $line\n";
7097 }
7098 }
7099 $fd->close();
7100 }
7101
7102 my $confsrc = "$tmpdir/qemu-server.conf";
7103
7104 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
7105
7106 my $cookie = { netcount => 0 };
7107 while (defined (my $line = <$srcfd>)) {
7108 $new_conf_raw .= restore_update_config_line(
7109 $cookie,
7110 $map,
7111 $line,
7112 $opts->{unique},
7113 );
7114 }
7115
7116 $srcfd->close();
7117 };
7118 if (my $err = $@) {
7119 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7120 die $err;
7121 }
7122
7123 rmtree $tmpdir;
7124
7125 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7126
7127 PVE::Cluster::cfs_update(); # make sure we read new file
7128
7129 eval { rescan($vmid, 1); };
7130 warn $@ if $@;
7131};
7132
7133sub foreach_storage_used_by_vm {
7134 my ($conf, $func) = @_;
7135
7136 my $sidhash = {};
7137
7138 PVE::QemuConfig->foreach_volume($conf, sub {
7139 my ($ds, $drive) = @_;
7140 return if drive_is_cdrom($drive);
7141
7142 my $volid = $drive->{file};
7143
7144 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7145 $sidhash->{$sid} = $sid if $sid;
7146 });
7147
7148 foreach my $sid (sort keys %$sidhash) {
7149 &$func($sid);
7150 }
7151}
7152
7153my $qemu_snap_storage = {
7154 rbd => 1,
7155};
7156sub do_snapshots_with_qemu {
7157 my ($storecfg, $volid, $deviceid) = @_;
7158
7159 return if $deviceid =~ m/tpmstate0/;
7160
7161 my $storage_name = PVE::Storage::parse_volume_id($volid);
7162 my $scfg = $storecfg->{ids}->{$storage_name};
7163 die "could not find storage '$storage_name'\n" if !defined($scfg);
7164
7165 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7166 return 1;
7167 }
7168
7169 if ($volid =~ m/\.(qcow2|qed)$/){
7170 return 1;
7171 }
7172
7173 return;
7174}
7175
7176sub qga_check_running {
7177 my ($vmid, $nowarn) = @_;
7178
7179 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7180 if ($@) {
7181 warn "Qemu Guest Agent is not running - $@" if !$nowarn;
7182 return 0;
7183 }
7184 return 1;
7185}
7186
7187sub template_create {
7188 my ($vmid, $conf, $disk) = @_;
7189
7190 my $storecfg = PVE::Storage::config();
7191
7192 PVE::QemuConfig->foreach_volume($conf, sub {
7193 my ($ds, $drive) = @_;
7194
7195 return if drive_is_cdrom($drive);
7196 return if $disk && $ds ne $disk;
7197
7198 my $volid = $drive->{file};
7199 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7200
7201 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7202 $drive->{file} = $voliddst;
7203 $conf->{$ds} = print_drive($drive);
7204 PVE::QemuConfig->write_config($vmid, $conf);
7205 });
7206}
7207
7208sub convert_iscsi_path {
7209 my ($path) = @_;
7210
7211 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7212 my $portal = $1;
7213 my $target = $2;
7214 my $lun = $3;
7215
7216 my $initiator_name = get_initiator_name();
7217
7218 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7219 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7220 }
7221
7222 die "cannot convert iscsi path '$path', unkown format\n";
7223}
7224
7225sub qemu_img_convert {
7226 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized) = @_;
7227
7228 my $storecfg = PVE::Storage::config();
7229 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7230 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7231
7232 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7233
7234 my $cachemode;
7235 my $src_path;
7236 my $src_is_iscsi = 0;
7237 my $src_format;
7238
7239 if ($src_storeid) {
7240 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7241 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7242 $src_format = qemu_img_format($src_scfg, $src_volname);
7243 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7244 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7245 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7246 } elsif (-f $src_volid) {
7247 $src_path = $src_volid;
7248 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7249 $src_format = $1;
7250 }
7251 }
7252
7253 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7254
7255 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7256 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7257 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7258 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7259
7260 my $cmd = [];
7261 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7262 push @$cmd, '-l', "snapshot.name=$snapname"
7263 if $snapname && $src_format && $src_format eq "qcow2";
7264 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7265 push @$cmd, '-T', $cachemode if defined($cachemode);
7266
7267 if ($src_is_iscsi) {
7268 push @$cmd, '--image-opts';
7269 $src_path = convert_iscsi_path($src_path);
7270 } elsif ($src_format) {
7271 push @$cmd, '-f', $src_format;
7272 }
7273
7274 if ($dst_is_iscsi) {
7275 push @$cmd, '--target-image-opts';
7276 $dst_path = convert_iscsi_path($dst_path);
7277 } else {
7278 push @$cmd, '-O', $dst_format;
7279 }
7280
7281 push @$cmd, $src_path;
7282
7283 if (!$dst_is_iscsi && $is_zero_initialized) {
7284 push @$cmd, "zeroinit:$dst_path";
7285 } else {
7286 push @$cmd, $dst_path;
7287 }
7288
7289 my $parser = sub {
7290 my $line = shift;
7291 if($line =~ m/\((\S+)\/100\%\)/){
7292 my $percent = $1;
7293 my $transferred = int($size * $percent / 100);
7294 my $total_h = render_bytes($size, 1);
7295 my $transferred_h = render_bytes($transferred, 1);
7296
7297 print "transferred $transferred_h of $total_h ($percent%)\n";
7298 }
7299
7300 };
7301
7302 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7303 my $err = $@;
7304 die "copy failed: $err" if $err;
7305}
7306
7307sub qemu_img_format {
7308 my ($scfg, $volname) = @_;
7309
7310 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7311 return $1;
7312 } else {
7313 return "raw";
7314 }
7315}
7316
7317sub qemu_drive_mirror {
7318 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7319
7320 $jobs = {} if !$jobs;
7321
7322 my $qemu_target;
7323 my $format;
7324 $jobs->{"drive-$drive"} = {};
7325
7326 if ($dst_volid =~ /^nbd:/) {
7327 $qemu_target = $dst_volid;
7328 $format = "nbd";
7329 } else {
7330 my $storecfg = PVE::Storage::config();
7331 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7332
7333 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7334
7335 $format = qemu_img_format($dst_scfg, $dst_volname);
7336
7337 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7338
7339 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7340 }
7341
7342 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7343 $opts->{format} = $format if $format;
7344
7345 if (defined($src_bitmap)) {
7346 $opts->{sync} = 'incremental';
7347 $opts->{bitmap} = $src_bitmap;
7348 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7349 }
7350
7351 if (defined($bwlimit)) {
7352 $opts->{speed} = $bwlimit * 1024;
7353 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7354 } else {
7355 print "drive mirror is starting for drive-$drive\n";
7356 }
7357
7358 # if a job already runs for this device we get an error, catch it for cleanup
7359 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7360 if (my $err = $@) {
7361 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7362 warn "$@\n" if $@;
7363 die "mirroring error: $err\n";
7364 }
7365
7366 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7367}
7368
7369# $completion can be either
7370# 'complete': wait until all jobs are ready, block-job-complete them (default)
7371# 'cancel': wait until all jobs are ready, block-job-cancel them
7372# 'skip': wait until all jobs are ready, return with block jobs in ready state
7373# 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7374sub qemu_drive_mirror_monitor {
7375 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7376
7377 $completion //= 'complete';
7378 $op //= "mirror";
7379
7380 eval {
7381 my $err_complete = 0;
7382
7383 my $starttime = time ();
7384 while (1) {
7385 die "block job ('$op') timed out\n" if $err_complete > 300;
7386
7387 my $stats = mon_cmd($vmid, "query-block-jobs");
7388 my $ctime = time();
7389
7390 my $running_jobs = {};
7391 for my $stat (@$stats) {
7392 next if $stat->{type} ne $op;
7393 $running_jobs->{$stat->{device}} = $stat;
7394 }
7395
7396 my $readycounter = 0;
7397
7398 for my $job_id (sort keys %$jobs) {
7399 my $job = $running_jobs->{$job_id};
7400
7401 my $vanished = !defined($job);
7402 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7403 if($complete || ($vanished && $completion eq 'auto')) {
7404 print "$job_id: $op-job finished\n";
7405 delete $jobs->{$job_id};
7406 next;
7407 }
7408
7409 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7410
7411 my $busy = $job->{busy};
7412 my $ready = $job->{ready};
7413 if (my $total = $job->{len}) {
7414 my $transferred = $job->{offset} || 0;
7415 my $remaining = $total - $transferred;
7416 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7417
7418 my $duration = $ctime - $starttime;
7419 my $total_h = render_bytes($total, 1);
7420 my $transferred_h = render_bytes($transferred, 1);
7421
7422 my $status = sprintf(
7423 "transferred $transferred_h of $total_h ($percent%%) in %s",
7424 render_duration($duration),
7425 );
7426
7427 if ($ready) {
7428 if ($busy) {
7429 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7430 } else {
7431 $status .= ", ready";
7432 }
7433 }
7434 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7435 $jobs->{$job_id}->{ready} = $ready;
7436 }
7437
7438 $readycounter++ if $job->{ready};
7439 }
7440
7441 last if scalar(keys %$jobs) == 0;
7442
7443 if ($readycounter == scalar(keys %$jobs)) {
7444 print "all '$op' jobs are ready\n";
7445
7446 # do the complete later (or has already been done)
7447 last if $completion eq 'skip' || $completion eq 'auto';
7448
7449 if ($vmiddst && $vmiddst != $vmid) {
7450 my $agent_running = $qga && qga_check_running($vmid);
7451 if ($agent_running) {
7452 print "freeze filesystem\n";
7453 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
7454 warn $@ if $@;
7455 } else {
7456 print "suspend vm\n";
7457 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
7458 warn $@ if $@;
7459 }
7460
7461 # if we clone a disk for a new target vm, we don't switch the disk
7462 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
7463
7464 if ($agent_running) {
7465 print "unfreeze filesystem\n";
7466 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
7467 warn $@ if $@;
7468 } else {
7469 print "resume vm\n";
7470 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7471 warn $@ if $@;
7472 }
7473
7474 last;
7475 } else {
7476
7477 for my $job_id (sort keys %$jobs) {
7478 # try to switch the disk if source and destination are on the same guest
7479 print "$job_id: Completing block job_id...\n";
7480
7481 my $op;
7482 if ($completion eq 'complete') {
7483 $op = 'block-job-complete';
7484 } elsif ($completion eq 'cancel') {
7485 $op = 'block-job-cancel';
7486 } else {
7487 die "invalid completion value: $completion\n";
7488 }
7489 eval { mon_cmd($vmid, $op, device => $job_id) };
7490 if ($@ =~ m/cannot be completed/) {
7491 print "$job_id: block job cannot be completed, trying again.\n";
7492 $err_complete++;
7493 }else {
7494 print "$job_id: Completed successfully.\n";
7495 $jobs->{$job_id}->{complete} = 1;
7496 }
7497 }
7498 }
7499 }
7500 sleep 1;
7501 }
7502 };
7503 my $err = $@;
7504
7505 if ($err) {
7506 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7507 die "block job ($op) error: $err";
7508 }
7509}
7510
7511sub qemu_blockjobs_cancel {
7512 my ($vmid, $jobs) = @_;
7513
7514 foreach my $job (keys %$jobs) {
7515 print "$job: Cancelling block job\n";
7516 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
7517 $jobs->{$job}->{cancel} = 1;
7518 }
7519
7520 while (1) {
7521 my $stats = mon_cmd($vmid, "query-block-jobs");
7522
7523 my $running_jobs = {};
7524 foreach my $stat (@$stats) {
7525 $running_jobs->{$stat->{device}} = $stat;
7526 }
7527
7528 foreach my $job (keys %$jobs) {
7529
7530 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
7531 print "$job: Done.\n";
7532 delete $jobs->{$job};
7533 }
7534 }
7535
7536 last if scalar(keys %$jobs) == 0;
7537
7538 sleep 1;
7539 }
7540}
7541
7542sub clone_disk {
7543 my ($storecfg, $vmid, $running, $drivename, $drive, $snapname,
7544 $newvmid, $storage, $format, $full, $newvollist, $jobs, $completion, $qga, $bwlimit, $conf) = @_;
7545
7546 my $newvolid;
7547
7548 if (!$full) {
7549 print "create linked clone of drive $drivename ($drive->{file})\n";
7550 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
7551 push @$newvollist, $newvolid;
7552 } else {
7553
7554 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
7555 $storeid = $storage if $storage;
7556
7557 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
7558
7559 print "create full clone of drive $drivename ($drive->{file})\n";
7560 my $name = undef;
7561 my $size = undef;
7562 if (drive_is_cloudinit($drive)) {
7563 $name = "vm-$newvmid-cloudinit";
7564 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7565 if ($scfg->{path}) {
7566 $name .= ".$dst_format";
7567 }
7568 $snapname = undef;
7569 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
7570 } elsif ($drivename eq 'efidisk0') {
7571 $size = get_efivars_size($conf);
7572 } elsif ($drivename eq 'tpmstate0') {
7573 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7574 } else {
7575 ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
7576 }
7577 $newvolid = PVE::Storage::vdisk_alloc(
7578 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
7579 );
7580 push @$newvollist, $newvolid;
7581
7582 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
7583
7584 if (drive_is_cloudinit($drive)) {
7585 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
7586 # if this is the case, we have to complete any block-jobs still there from
7587 # previous drive-mirrors
7588 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
7589 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
7590 }
7591 goto no_data_clone;
7592 }
7593
7594 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
7595 if (!$running || $snapname) {
7596 # TODO: handle bwlimits
7597 if ($drivename eq 'efidisk0') {
7598 # the relevant data on the efidisk may be smaller than the source
7599 # e.g. on RBD/ZFS, so we use dd to copy only the amount
7600 # that is given by the OVMF_VARS.fd
7601 my $src_path = PVE::Storage::path($storecfg, $drive->{file});
7602 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
7603
7604 # better for Ceph if block size is not too small, see bug #3324
7605 my $bs = 1024*1024;
7606
7607 run_command(['qemu-img', 'dd', '-n', '-O', $dst_format, "bs=$bs", "osize=$size",
7608 "if=$src_path", "of=$dst_path"]);
7609 } else {
7610 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit);
7611 }
7612 } else {
7613
7614 die "cannot move TPM state while VM is running\n" if $drivename eq 'tpmstate0';
7615
7616 my $kvmver = get_running_qemu_version ($vmid);
7617 if (!min_version($kvmver, 2, 7)) {
7618 die "drive-mirror with iothread requires qemu version 2.7 or higher\n"
7619 if $drive->{iothread};
7620 }
7621
7622 qemu_drive_mirror($vmid, $drivename, $newvolid, $newvmid, $sparseinit, $jobs,
7623 $completion, $qga, $bwlimit);
7624 }
7625 }
7626
7627no_data_clone:
7628 my ($size) = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
7629
7630 my $disk = dclone($drive);
7631 delete $disk->{format};
7632 $disk->{file} = $newvolid;
7633 $disk->{size} = $size if defined($size);
7634
7635 return $disk;
7636}
7637
7638sub get_running_qemu_version {
7639 my ($vmid) = @_;
7640 my $res = mon_cmd($vmid, "query-version");
7641 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
7642}
7643
7644sub qemu_use_old_bios_files {
7645 my ($machine_type) = @_;
7646
7647 return if !$machine_type;
7648
7649 my $use_old_bios_files = undef;
7650
7651 if ($machine_type =~ m/^(\S+)\.pxe$/) {
7652 $machine_type = $1;
7653 $use_old_bios_files = 1;
7654 } else {
7655 my $version = extract_version($machine_type, kvm_user_version());
7656 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
7657 # load new efi bios files on migration. So this hack is required to allow
7658 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
7659 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
7660 $use_old_bios_files = !min_version($version, 2, 4);
7661 }
7662
7663 return ($use_old_bios_files, $machine_type);
7664}
7665
7666sub get_efivars_size {
7667 my ($conf) = @_;
7668 my $arch = get_vm_arch($conf);
7669 my $efidisk = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
7670 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
7671 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7672 die "uefi vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
7673 return -s $ovmf_vars;
7674}
7675
7676sub update_efidisk_size {
7677 my ($conf) = @_;
7678
7679 return if !defined($conf->{efidisk0});
7680
7681 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
7682 $disk->{size} = get_efivars_size($conf);
7683 $conf->{efidisk0} = print_drive($disk);
7684
7685 return;
7686}
7687
7688sub update_tpmstate_size {
7689 my ($conf) = @_;
7690
7691 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
7692 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7693 $conf->{tpmstate0} = print_drive($disk);
7694}
7695
7696sub create_efidisk($$$$$$$) {
7697 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
7698
7699 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7700 die "EFI vars default image not found\n" if ! -f $ovmf_vars;
7701
7702 my $vars_size_b = -s $ovmf_vars;
7703 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
7704 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
7705 PVE::Storage::activate_volumes($storecfg, [$volid]);
7706
7707 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
7708 my ($size) = PVE::Storage::volume_size_info($storecfg, $volid, 3);
7709
7710 return ($volid, $size/1024);
7711}
7712
7713sub vm_iothreads_list {
7714 my ($vmid) = @_;
7715
7716 my $res = mon_cmd($vmid, 'query-iothreads');
7717
7718 my $iothreads = {};
7719 foreach my $iothread (@$res) {
7720 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
7721 }
7722
7723 return $iothreads;
7724}
7725
7726sub scsihw_infos {
7727 my ($conf, $drive) = @_;
7728
7729 my $maxdev = 0;
7730
7731 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
7732 $maxdev = 7;
7733 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
7734 $maxdev = 1;
7735 } else {
7736 $maxdev = 256;
7737 }
7738
7739 my $controller = int($drive->{index} / $maxdev);
7740 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
7741 ? "virtioscsi"
7742 : "scsihw";
7743
7744 return ($maxdev, $controller, $controller_prefix);
7745}
7746
7747sub windows_version {
7748 my ($ostype) = @_;
7749
7750 return 0 if !$ostype;
7751
7752 my $winversion = 0;
7753
7754 if($ostype eq 'wxp' || $ostype eq 'w2k3' || $ostype eq 'w2k') {
7755 $winversion = 5;
7756 } elsif($ostype eq 'w2k8' || $ostype eq 'wvista') {
7757 $winversion = 6;
7758 } elsif ($ostype =~ m/^win(\d+)$/) {
7759 $winversion = $1;
7760 }
7761
7762 return $winversion;
7763}
7764
7765sub resolve_dst_disk_format {
7766 my ($storecfg, $storeid, $src_volname, $format) = @_;
7767 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
7768
7769 if (!$format) {
7770 # if no target format is specified, use the source disk format as hint
7771 if ($src_volname) {
7772 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7773 $format = qemu_img_format($scfg, $src_volname);
7774 } else {
7775 return $defFormat;
7776 }
7777 }
7778
7779 # test if requested format is supported - else use default
7780 my $supported = grep { $_ eq $format } @$validFormats;
7781 $format = $defFormat if !$supported;
7782 return $format;
7783}
7784
7785# NOTE: if this logic changes, please update docs & possibly gui logic
7786sub find_vmstate_storage {
7787 my ($conf, $storecfg) = @_;
7788
7789 # first, return storage from conf if set
7790 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
7791
7792 my ($target, $shared, $local);
7793
7794 foreach_storage_used_by_vm($conf, sub {
7795 my ($sid) = @_;
7796 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
7797 my $dst = $scfg->{shared} ? \$shared : \$local;
7798 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
7799 });
7800
7801 # second, use shared storage where VM has at least one disk
7802 # third, use local storage where VM has at least one disk
7803 # fall back to local storage
7804 $target = $shared // $local // 'local';
7805
7806 return $target;
7807}
7808
7809sub generate_uuid {
7810 my ($uuid, $uuid_str);
7811 UUID::generate($uuid);
7812 UUID::unparse($uuid, $uuid_str);
7813 return $uuid_str;
7814}
7815
7816sub generate_smbios1_uuid {
7817 return "uuid=".generate_uuid();
7818}
7819
7820sub nbd_stop {
7821 my ($vmid) = @_;
7822
7823 mon_cmd($vmid, 'nbd-server-stop');
7824}
7825
7826sub create_reboot_request {
7827 my ($vmid) = @_;
7828 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
7829 or die "failed to create reboot trigger file: $!\n";
7830 close($fh);
7831}
7832
7833sub clear_reboot_request {
7834 my ($vmid) = @_;
7835 my $path = "/run/qemu-server/$vmid.reboot";
7836 my $res = 0;
7837
7838 $res = unlink($path);
7839 die "could not remove reboot request for $vmid: $!"
7840 if !$res && $! != POSIX::ENOENT;
7841
7842 return $res;
7843}
7844
7845sub bootorder_from_legacy {
7846 my ($conf, $bootcfg) = @_;
7847
7848 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
7849 my $bootindex_hash = {};
7850 my $i = 1;
7851 foreach my $o (split(//, $boot)) {
7852 $bootindex_hash->{$o} = $i*100;
7853 $i++;
7854 }
7855
7856 my $bootorder = {};
7857
7858 PVE::QemuConfig->foreach_volume($conf, sub {
7859 my ($ds, $drive) = @_;
7860
7861 if (drive_is_cdrom ($drive, 1)) {
7862 if ($bootindex_hash->{d}) {
7863 $bootorder->{$ds} = $bootindex_hash->{d};
7864 $bootindex_hash->{d} += 1;
7865 }
7866 } elsif ($bootindex_hash->{c}) {
7867 $bootorder->{$ds} = $bootindex_hash->{c}
7868 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
7869 $bootindex_hash->{c} += 1;
7870 }
7871 });
7872
7873 if ($bootindex_hash->{n}) {
7874 for (my $i = 0; $i < $MAX_NETS; $i++) {
7875 my $netname = "net$i";
7876 next if !$conf->{$netname};
7877 $bootorder->{$netname} = $bootindex_hash->{n};
7878 $bootindex_hash->{n} += 1;
7879 }
7880 }
7881
7882 return $bootorder;
7883}
7884
7885# Generate default device list for 'boot: order=' property. Matches legacy
7886# default boot order, but with explicit device names. This is important, since
7887# the fallback for when neither 'order' nor the old format is specified relies
7888# on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
7889sub get_default_bootdevices {
7890 my ($conf) = @_;
7891
7892 my @ret = ();
7893
7894 # harddisk
7895 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
7896 push @ret, $first if $first;
7897
7898 # cdrom
7899 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
7900 push @ret, $first if $first;
7901
7902 # network
7903 for (my $i = 0; $i < $MAX_NETS; $i++) {
7904 my $netname = "net$i";
7905 next if !$conf->{$netname};
7906 push @ret, $netname;
7907 last;
7908 }
7909
7910 return \@ret;
7911}
7912
7913sub device_bootorder {
7914 my ($conf) = @_;
7915
7916 return bootorder_from_legacy($conf) if !defined($conf->{boot});
7917
7918 my $boot = parse_property_string($boot_fmt, $conf->{boot});
7919
7920 my $bootorder = {};
7921 if (!defined($boot) || $boot->{legacy}) {
7922 $bootorder = bootorder_from_legacy($conf, $boot);
7923 } elsif ($boot->{order}) {
7924 my $i = 100; # start at 100 to allow user to insert devices before us with -args
7925 for my $dev (PVE::Tools::split_list($boot->{order})) {
7926 $bootorder->{$dev} = $i++;
7927 }
7928 }
7929
7930 return $bootorder;
7931}
7932
7933sub register_qmeventd_handle {
7934 my ($vmid) = @_;
7935
7936 my $fh;
7937 my $peer = "/var/run/qmeventd.sock";
7938 my $count = 0;
7939
7940 for (;;) {
7941 $count++;
7942 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
7943 last if $fh;
7944 if ($! != EINTR && $! != EAGAIN) {
7945 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
7946 }
7947 if ($count > 4) {
7948 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
7949 . "after $count retries\n";
7950 }
7951 usleep(25000);
7952 }
7953
7954 # send handshake to mark VM as backing up
7955 print $fh to_json({vzdump => {vmid => "$vmid"}});
7956
7957 # return handle to be closed later when inhibit is no longer required
7958 return $fh;
7959}
7960
7961# bash completion helper
7962
7963sub complete_backup_archives {
7964 my ($cmdname, $pname, $cvalue) = @_;
7965
7966 my $cfg = PVE::Storage::config();
7967
7968 my $storeid;
7969
7970 if ($cvalue =~ m/^([^:]+):/) {
7971 $storeid = $1;
7972 }
7973
7974 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
7975
7976 my $res = [];
7977 foreach my $id (keys %$data) {
7978 foreach my $item (@{$data->{$id}}) {
7979 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
7980 push @$res, $item->{volid} if defined($item->{volid});
7981 }
7982 }
7983
7984 return $res;
7985}
7986
7987my $complete_vmid_full = sub {
7988 my ($running) = @_;
7989
7990 my $idlist = vmstatus();
7991
7992 my $res = [];
7993
7994 foreach my $id (keys %$idlist) {
7995 my $d = $idlist->{$id};
7996 if (defined($running)) {
7997 next if $d->{template};
7998 next if $running && $d->{status} ne 'running';
7999 next if !$running && $d->{status} eq 'running';
8000 }
8001 push @$res, $id;
8002
8003 }
8004 return $res;
8005};
8006
8007sub complete_vmid {
8008 return &$complete_vmid_full();
8009}
8010
8011sub complete_vmid_stopped {
8012 return &$complete_vmid_full(0);
8013}
8014
8015sub complete_vmid_running {
8016 return &$complete_vmid_full(1);
8017}
8018
8019sub complete_storage {
8020
8021 my $cfg = PVE::Storage::config();
8022 my $ids = $cfg->{ids};
8023
8024 my $res = [];
8025 foreach my $sid (keys %$ids) {
8026 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
8027 next if !$ids->{$sid}->{content}->{images};
8028 push @$res, $sid;
8029 }
8030
8031 return $res;
8032}
8033
8034sub complete_migration_storage {
8035 my ($cmd, $param, $current_value, $all_args) = @_;
8036
8037 my $targetnode = @$all_args[1];
8038
8039 my $cfg = PVE::Storage::config();
8040 my $ids = $cfg->{ids};
8041
8042 my $res = [];
8043 foreach my $sid (keys %$ids) {
8044 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
8045 next if !$ids->{$sid}->{content}->{images};
8046 push @$res, $sid;
8047 }
8048
8049 return $res;
8050}
8051
8052sub vm_is_paused {
8053 my ($vmid) = @_;
8054 my $qmpstatus = eval {
8055 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8056 mon_cmd($vmid, "query-status");
8057 };
8058 warn "$@\n" if $@;
8059 return $qmpstatus && $qmpstatus->{status} eq "paused";
8060}
8061
8062sub check_volume_storage_type {
8063 my ($storecfg, $vol) = @_;
8064
8065 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8066 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8067 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8068
8069 die "storage '$storeid' does not support content-type '$vtype'\n"
8070 if !$scfg->{content}->{$vtype};
8071
8072 return 1;
8073}
8074
80751;