]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
ec1cc78af1cb567f1e652eb75bf6e9242a9979fd
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5
6 use Cwd 'abs_path';
7 use Digest::SHA;
8 use Fcntl ':flock';
9 use Fcntl;
10 use File::Basename;
11 use File::Copy qw(copy);
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use IO::Dir;
16 use IO::File;
17 use IO::Handle;
18 use IO::Select;
19 use IO::Socket::UNIX;
20 use IPC::Open3;
21 use JSON;
22 use MIME::Base64;
23 use POSIX;
24 use Storable qw(dclone);
25 use Time::HiRes qw(gettimeofday usleep);
26 use URI::Escape;
27 use UUID;
28
29 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
30 use PVE::CGroup;
31 use PVE::CpuSet;
32 use PVE::DataCenterConfig;
33 use PVE::Exception qw(raise raise_param_exc);
34 use PVE::Format qw(render_duration render_bytes);
35 use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
36 use PVE::INotify;
37 use PVE::JSONSchema qw(get_standard_option parse_property_string);
38 use PVE::ProcFSTools;
39 use PVE::PBSClient;
40 use PVE::RESTEnvironment qw(log_warn);
41 use PVE::RPCEnvironment;
42 use PVE::Storage;
43 use PVE::SysFSTools;
44 use PVE::Systemd;
45 use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
46
47 use PVE::QMPClient;
48 use PVE::QemuConfig;
49 use PVE::QemuServer::Helpers qw(min_version config_aware_timeout windows_version);
50 use PVE::QemuServer::Cloudinit;
51 use PVE::QemuServer::CGroup;
52 use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
53 use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
54 use PVE::QemuServer::Machine;
55 use PVE::QemuServer::Memory;
56 use PVE::QemuServer::Monitor qw(mon_cmd);
57 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
58 use PVE::QemuServer::USB qw(parse_usb_device);
59
60 my $have_sdn;
61 eval {
62 require PVE::Network::SDN::Zones;
63 $have_sdn = 1;
64 };
65
66 my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
67 my $OVMF = {
68 x86_64 => {
69 '4m-no-smm' => [
70 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
71 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
72 ],
73 '4m-no-smm-ms' => [
74 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
75 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
76 ],
77 '4m' => [
78 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
79 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
80 ],
81 '4m-ms' => [
82 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
83 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
84 ],
85 default => [
86 "$EDK2_FW_BASE/OVMF_CODE.fd",
87 "$EDK2_FW_BASE/OVMF_VARS.fd",
88 ],
89 },
90 aarch64 => {
91 default => [
92 "$EDK2_FW_BASE/AAVMF_CODE.fd",
93 "$EDK2_FW_BASE/AAVMF_VARS.fd",
94 ],
95 },
96 };
97
98 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
99
100 # Note about locking: we use flock on the config file protect against concurent actions.
101 # Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
102 # 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
103 # But you can ignore this kind of lock with the --skiplock flag.
104
105 cfs_register_file('/qemu-server/',
106 \&parse_vm_config,
107 \&write_vm_config);
108
109 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
110 description => "Some command save/restore state from this location.",
111 type => 'string',
112 maxLength => 128,
113 optional => 1,
114 });
115
116 PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
117 description => "Specifies the Qemu machine type.",
118 type => 'string',
119 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
120 maxLength => 40,
121 optional => 1,
122 });
123
124 #no warnings 'redefine';
125
126 my $nodename_cache;
127 sub nodename {
128 $nodename_cache //= PVE::INotify::nodename();
129 return $nodename_cache;
130 }
131
132 my $watchdog_fmt = {
133 model => {
134 default_key => 1,
135 type => 'string',
136 enum => [qw(i6300esb ib700)],
137 description => "Watchdog type to emulate.",
138 default => 'i6300esb',
139 optional => 1,
140 },
141 action => {
142 type => 'string',
143 enum => [qw(reset shutdown poweroff pause debug none)],
144 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
145 optional => 1,
146 },
147 };
148 PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
149
150 my $agent_fmt = {
151 enabled => {
152 description => "Enable/disable communication with a Qemu Guest Agent (QGA) running in the VM.",
153 type => 'boolean',
154 default => 0,
155 default_key => 1,
156 },
157 fstrim_cloned_disks => {
158 description => "Run fstrim after moving a disk or migrating the VM.",
159 type => 'boolean',
160 optional => 1,
161 default => 0
162 },
163 type => {
164 description => "Select the agent type",
165 type => 'string',
166 default => 'virtio',
167 optional => 1,
168 enum => [qw(virtio isa)],
169 },
170 };
171
172 my $vga_fmt = {
173 type => {
174 description => "Select the VGA type.",
175 type => 'string',
176 default => 'std',
177 optional => 1,
178 default_key => 1,
179 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware)],
180 },
181 memory => {
182 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
183 type => 'integer',
184 optional => 1,
185 minimum => 4,
186 maximum => 512,
187 },
188 };
189
190 my $ivshmem_fmt = {
191 size => {
192 type => 'integer',
193 minimum => 1,
194 description => "The size of the file in MB.",
195 },
196 name => {
197 type => 'string',
198 pattern => '[a-zA-Z0-9\-]+',
199 optional => 1,
200 format_description => 'string',
201 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
202 },
203 };
204
205 my $audio_fmt = {
206 device => {
207 type => 'string',
208 enum => [qw(ich9-intel-hda intel-hda AC97)],
209 description => "Configure an audio device."
210 },
211 driver => {
212 type => 'string',
213 enum => ['spice', 'none'],
214 default => 'spice',
215 optional => 1,
216 description => "Driver backend for the audio device."
217 },
218 };
219
220 my $spice_enhancements_fmt = {
221 foldersharing => {
222 type => 'boolean',
223 optional => 1,
224 default => '0',
225 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
226 },
227 videostreaming => {
228 type => 'string',
229 enum => ['off', 'all', 'filter'],
230 default => 'off',
231 optional => 1,
232 description => "Enable video streaming. Uses compression for detected video streams."
233 },
234 };
235
236 my $rng_fmt = {
237 source => {
238 type => 'string',
239 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
240 default_key => 1,
241 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
242 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
243 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
244 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
245 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
246 ." a hardware RNG from the host.",
247 },
248 max_bytes => {
249 type => 'integer',
250 description => "Maximum bytes of entropy allowed to get injected into the guest every"
251 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
252 ." `0` to disable limiting (potentially dangerous!).",
253 optional => 1,
254
255 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
256 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
257 # reading from /dev/urandom
258 default => 1024,
259 },
260 period => {
261 type => 'integer',
262 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
263 ." the guest to retrieve another 'max_bytes' of entropy.",
264 optional => 1,
265 default => 1000,
266 },
267 };
268
269 my $meta_info_fmt = {
270 'ctime' => {
271 type => 'integer',
272 description => "The guest creation timestamp as UNIX epoch time",
273 minimum => 0,
274 optional => 1,
275 },
276 'creation-qemu' => {
277 type => 'string',
278 description => "The QEMU (machine) version from the time this VM was created.",
279 pattern => '\d+(\.\d+)+',
280 optional => 1,
281 },
282 };
283
284 my $confdesc = {
285 onboot => {
286 optional => 1,
287 type => 'boolean',
288 description => "Specifies whether a VM will be started during system bootup.",
289 default => 0,
290 },
291 autostart => {
292 optional => 1,
293 type => 'boolean',
294 description => "Automatic restart after crash (currently ignored).",
295 default => 0,
296 },
297 hotplug => {
298 optional => 1,
299 type => 'string', format => 'pve-hotplug-features',
300 description => "Selectively enable hotplug features. This is a comma separated list of"
301 ." hotplug features: 'network', 'disk', 'cpu', 'memory', 'usb' and 'cloudinit'. Use '0' to disable"
302 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`.",
303 default => 'network,disk,usb',
304 },
305 reboot => {
306 optional => 1,
307 type => 'boolean',
308 description => "Allow reboot. If set to '0' the VM exit on reboot.",
309 default => 1,
310 },
311 lock => {
312 optional => 1,
313 type => 'string',
314 description => "Lock/unlock the VM.",
315 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
316 },
317 cpulimit => {
318 optional => 1,
319 type => 'number',
320 description => "Limit of CPU usage.",
321 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
322 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
323 minimum => 0,
324 maximum => 128,
325 default => 0,
326 },
327 cpuunits => {
328 optional => 1,
329 type => 'integer',
330 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
331 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
332 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
333 ." weights of all the other running VMs.",
334 minimum => 1,
335 maximum => 262144,
336 default => 'cgroup v1: 1024, cgroup v2: 100',
337 },
338 memory => {
339 optional => 1,
340 type => 'integer',
341 description => "Amount of RAM for the VM in MB. This is the maximum available memory when"
342 ." you use the balloon device.",
343 minimum => 16,
344 default => 512,
345 },
346 balloon => {
347 optional => 1,
348 type => 'integer',
349 description => "Amount of target RAM for the VM in MB. Using zero disables the ballon driver.",
350 minimum => 0,
351 },
352 shares => {
353 optional => 1,
354 type => 'integer',
355 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
356 ." more memory this VM gets. Number is relative to weights of all other running VMs."
357 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
358 minimum => 0,
359 maximum => 50000,
360 default => 1000,
361 },
362 keyboard => {
363 optional => 1,
364 type => 'string',
365 description => "Keyboard layout for VNC server. This option is generally not required and"
366 ." is often better handled from within the guest OS.",
367 enum => PVE::Tools::kvmkeymaplist(),
368 default => undef,
369 },
370 name => {
371 optional => 1,
372 type => 'string', format => 'dns-name',
373 description => "Set a name for the VM. Only used on the configuration web interface.",
374 },
375 scsihw => {
376 optional => 1,
377 type => 'string',
378 description => "SCSI controller model",
379 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
380 default => 'lsi',
381 },
382 description => {
383 optional => 1,
384 type => 'string',
385 description => "Description for the VM. Shown in the web-interface VM's summary."
386 ." This is saved as comment inside the configuration file.",
387 maxLength => 1024 * 8,
388 },
389 ostype => {
390 optional => 1,
391 type => 'string',
392 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
393 description => "Specify guest operating system.",
394 verbose_description => <<EODESC,
395 Specify guest operating system. This is used to enable special
396 optimization/features for specific operating systems:
397
398 [horizontal]
399 other;; unspecified OS
400 wxp;; Microsoft Windows XP
401 w2k;; Microsoft Windows 2000
402 w2k3;; Microsoft Windows 2003
403 w2k8;; Microsoft Windows 2008
404 wvista;; Microsoft Windows Vista
405 win7;; Microsoft Windows 7
406 win8;; Microsoft Windows 8/2012/2012r2
407 win10;; Microsoft Windows 10/2016/2019
408 win11;; Microsoft Windows 11/2022
409 l24;; Linux 2.4 Kernel
410 l26;; Linux 2.6 - 5.X Kernel
411 solaris;; Solaris/OpenSolaris/OpenIndiania kernel
412 EODESC
413 },
414 boot => {
415 optional => 1,
416 type => 'string', format => 'pve-qm-boot',
417 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
418 ." key or 'legacy=' is deprecated.",
419 },
420 bootdisk => {
421 optional => 1,
422 type => 'string', format => 'pve-qm-bootdisk',
423 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
424 pattern => '(ide|sata|scsi|virtio)\d+',
425 },
426 smp => {
427 optional => 1,
428 type => 'integer',
429 description => "The number of CPUs. Please use option -sockets instead.",
430 minimum => 1,
431 default => 1,
432 },
433 sockets => {
434 optional => 1,
435 type => 'integer',
436 description => "The number of CPU sockets.",
437 minimum => 1,
438 default => 1,
439 },
440 cores => {
441 optional => 1,
442 type => 'integer',
443 description => "The number of cores per socket.",
444 minimum => 1,
445 default => 1,
446 },
447 numa => {
448 optional => 1,
449 type => 'boolean',
450 description => "Enable/disable NUMA.",
451 default => 0,
452 },
453 hugepages => {
454 optional => 1,
455 type => 'string',
456 description => "Enable/disable hugepages memory.",
457 enum => [qw(any 2 1024)],
458 },
459 keephugepages => {
460 optional => 1,
461 type => 'boolean',
462 default => 0,
463 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
464 ." after VM shutdown and can be used for subsequent starts.",
465 },
466 vcpus => {
467 optional => 1,
468 type => 'integer',
469 description => "Number of hotplugged vcpus.",
470 minimum => 1,
471 default => 0,
472 },
473 acpi => {
474 optional => 1,
475 type => 'boolean',
476 description => "Enable/disable ACPI.",
477 default => 1,
478 },
479 agent => {
480 optional => 1,
481 description => "Enable/disable communication with the Qemu Guest Agent and its properties.",
482 type => 'string',
483 format => $agent_fmt,
484 },
485 kvm => {
486 optional => 1,
487 type => 'boolean',
488 description => "Enable/disable KVM hardware virtualization.",
489 default => 1,
490 },
491 tdf => {
492 optional => 1,
493 type => 'boolean',
494 description => "Enable/disable time drift fix.",
495 default => 0,
496 },
497 localtime => {
498 optional => 1,
499 type => 'boolean',
500 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
501 ." the `ostype` indicates a Microsoft Windows OS.",
502 },
503 freeze => {
504 optional => 1,
505 type => 'boolean',
506 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
507 },
508 vga => {
509 optional => 1,
510 type => 'string', format => $vga_fmt,
511 description => "Configure the VGA hardware.",
512 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
513 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
514 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
515 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
516 ." display server. For win* OS you can select how many independent displays you want,"
517 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
518 ." using a serial device as terminal.",
519 },
520 watchdog => {
521 optional => 1,
522 type => 'string', format => 'pve-qm-watchdog',
523 description => "Create a virtual hardware watchdog device.",
524 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
525 ." action), the watchdog must be periodically polled by an agent inside the guest or"
526 ." else the watchdog will reset the guest (or execute the respective action specified)",
527 },
528 startdate => {
529 optional => 1,
530 type => 'string',
531 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
532 description => "Set the initial date of the real time clock. Valid format for date are:"
533 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
534 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
535 default => 'now',
536 },
537 startup => get_standard_option('pve-startup-order'),
538 template => {
539 optional => 1,
540 type => 'boolean',
541 description => "Enable/disable Template.",
542 default => 0,
543 },
544 args => {
545 optional => 1,
546 type => 'string',
547 description => "Arbitrary arguments passed to kvm.",
548 verbose_description => <<EODESCR,
549 Arbitrary arguments passed to kvm, for example:
550
551 args: -no-reboot -no-hpet
552
553 NOTE: this option is for experts only.
554 EODESCR
555 },
556 tablet => {
557 optional => 1,
558 type => 'boolean',
559 default => 1,
560 description => "Enable/disable the USB tablet device.",
561 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
562 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
563 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
564 ." may consider disabling this to save some context switches. This is turned off by"
565 ." default if you use spice (`qm set <vmid> --vga qxl`).",
566 },
567 migrate_speed => {
568 optional => 1,
569 type => 'integer',
570 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
571 minimum => 0,
572 default => 0,
573 },
574 migrate_downtime => {
575 optional => 1,
576 type => 'number',
577 description => "Set maximum tolerated downtime (in seconds) for migrations.",
578 minimum => 0,
579 default => 0.1,
580 },
581 cdrom => {
582 optional => 1,
583 type => 'string', format => 'pve-qm-ide',
584 typetext => '<volume>',
585 description => "This is an alias for option -ide2",
586 },
587 cpu => {
588 optional => 1,
589 description => "Emulated CPU type.",
590 type => 'string',
591 format => 'pve-vm-cpu-conf',
592 },
593 parent => get_standard_option('pve-snapshot-name', {
594 optional => 1,
595 description => "Parent snapshot name. This is used internally, and should not be modified.",
596 }),
597 snaptime => {
598 optional => 1,
599 description => "Timestamp for snapshots.",
600 type => 'integer',
601 minimum => 0,
602 },
603 vmstate => {
604 optional => 1,
605 type => 'string', format => 'pve-volume-id',
606 description => "Reference to a volume which stores the VM state. This is used internally"
607 ." for snapshots.",
608 },
609 vmstatestorage => get_standard_option('pve-storage-id', {
610 description => "Default storage for VM state volumes/files.",
611 optional => 1,
612 }),
613 runningmachine => get_standard_option('pve-qemu-machine', {
614 description => "Specifies the QEMU machine type of the running vm. This is used internally"
615 ." for snapshots.",
616 }),
617 runningcpu => {
618 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
619 ." internally for snapshots.",
620 optional => 1,
621 type => 'string',
622 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
623 format_description => 'QEMU -cpu parameter'
624 },
625 machine => get_standard_option('pve-qemu-machine'),
626 arch => {
627 description => "Virtual processor architecture. Defaults to the host.",
628 optional => 1,
629 type => 'string',
630 enum => [qw(x86_64 aarch64)],
631 },
632 smbios1 => {
633 description => "Specify SMBIOS type 1 fields.",
634 type => 'string', format => 'pve-qm-smbios1',
635 maxLength => 512,
636 optional => 1,
637 },
638 protection => {
639 optional => 1,
640 type => 'boolean',
641 description => "Sets the protection flag of the VM. This will disable the remove VM and"
642 ." remove disk operations.",
643 default => 0,
644 },
645 bios => {
646 optional => 1,
647 type => 'string',
648 enum => [ qw(seabios ovmf) ],
649 description => "Select BIOS implementation.",
650 default => 'seabios',
651 },
652 vmgenid => {
653 type => 'string',
654 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
655 format_description => 'UUID',
656 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
657 ." to disable explicitly.",
658 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
659 ." value identifier to the guest OS. This allows to notify the guest operating system"
660 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
661 ." execution or creation from a template). The guest operating system notices the"
662 ." change, and is then able to react as appropriate by marking its copies of"
663 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
664 ."Note that auto-creation only works when done through API/CLI create or update methods"
665 .", but not when manually editing the config file.",
666 default => "1 (autogenerated)",
667 optional => 1,
668 },
669 hookscript => {
670 type => 'string',
671 format => 'pve-volume-id',
672 optional => 1,
673 description => "Script that will be executed during various steps in the vms lifetime.",
674 },
675 ivshmem => {
676 type => 'string',
677 format => $ivshmem_fmt,
678 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
679 ." the host.",
680 optional => 1,
681 },
682 audio0 => {
683 type => 'string',
684 format => $audio_fmt,
685 description => "Configure a audio device, useful in combination with QXL/Spice.",
686 optional => 1
687 },
688 spice_enhancements => {
689 type => 'string',
690 format => $spice_enhancements_fmt,
691 description => "Configure additional enhancements for SPICE.",
692 optional => 1
693 },
694 tags => {
695 type => 'string', format => 'pve-tag-list',
696 description => 'Tags of the VM. This is only meta information.',
697 optional => 1,
698 },
699 rng0 => {
700 type => 'string',
701 format => $rng_fmt,
702 description => "Configure a VirtIO-based Random Number Generator.",
703 optional => 1,
704 },
705 meta => {
706 type => 'string',
707 format => $meta_info_fmt,
708 description => "Some (read-only) meta-information about this guest.",
709 optional => 1,
710 },
711 affinity => {
712 type => 'string', format => 'pve-cpuset',
713 description => "List of host cores used to execute guest processes.",
714 optional => 1,
715 },
716 };
717
718 my $cicustom_fmt = {
719 meta => {
720 type => 'string',
721 optional => 1,
722 description => 'Specify a custom file containing all meta data passed to the VM via"
723 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
724 format => 'pve-volume-id',
725 format_description => 'volume',
726 },
727 network => {
728 type => 'string',
729 optional => 1,
730 description => 'Specify a custom file containing all network data passed to the VM via'
731 .' cloud-init.',
732 format => 'pve-volume-id',
733 format_description => 'volume',
734 },
735 user => {
736 type => 'string',
737 optional => 1,
738 description => 'Specify a custom file containing all user data passed to the VM via'
739 .' cloud-init.',
740 format => 'pve-volume-id',
741 format_description => 'volume',
742 },
743 vendor => {
744 type => 'string',
745 optional => 1,
746 description => 'Specify a custom file containing all vendor data passed to the VM via'
747 .' cloud-init.',
748 format => 'pve-volume-id',
749 format_description => 'volume',
750 },
751 };
752 PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
753
754 my $confdesc_cloudinit = {
755 citype => {
756 optional => 1,
757 type => 'string',
758 description => 'Specifies the cloud-init configuration format. The default depends on the'
759 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
760 .' and `configdrive2` for windows.',
761 enum => ['configdrive2', 'nocloud', 'opennebula'],
762 },
763 ciuser => {
764 optional => 1,
765 type => 'string',
766 description => "cloud-init: User name to change ssh keys and password for instead of the"
767 ." image's configured default user.",
768 },
769 cipassword => {
770 optional => 1,
771 type => 'string',
772 description => 'cloud-init: Password to assign the user. Using this is generally not'
773 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
774 .' support hashed passwords.',
775 },
776 cicustom => {
777 optional => 1,
778 type => 'string',
779 description => 'cloud-init: Specify custom files to replace the automatically generated'
780 .' ones at start.',
781 format => 'pve-qm-cicustom',
782 },
783 searchdomain => {
784 optional => 1,
785 type => 'string',
786 description => 'cloud-init: Sets DNS search domains for a container. Create will'
787 .' automatically use the setting from the host if neither searchdomain nor nameserver'
788 .' are set.',
789 },
790 nameserver => {
791 optional => 1,
792 type => 'string', format => 'address-list',
793 description => 'cloud-init: Sets DNS server IP address for a container. Create will'
794 .' automatically use the setting from the host if neither searchdomain nor nameserver'
795 .' are set.',
796 },
797 sshkeys => {
798 optional => 1,
799 type => 'string',
800 format => 'urlencoded',
801 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
802 },
803 };
804
805 # what about other qemu settings ?
806 #cpu => 'string',
807 #machine => 'string',
808 #fda => 'file',
809 #fdb => 'file',
810 #mtdblock => 'file',
811 #sd => 'file',
812 #pflash => 'file',
813 #snapshot => 'bool',
814 #bootp => 'file',
815 ##tftp => 'dir',
816 ##smb => 'dir',
817 #kernel => 'file',
818 #append => 'string',
819 #initrd => 'file',
820 ##soundhw => 'string',
821
822 while (my ($k, $v) = each %$confdesc) {
823 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
824 }
825
826 my $MAX_USB_DEVICES = 5;
827 my $MAX_NETS = 32;
828 my $MAX_SERIAL_PORTS = 4;
829 my $MAX_PARALLEL_PORTS = 3;
830 my $MAX_NUMA = 8;
831
832 my $numa_fmt = {
833 cpus => {
834 type => "string",
835 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
836 description => "CPUs accessing this NUMA node.",
837 format_description => "id[-id];...",
838 },
839 memory => {
840 type => "number",
841 description => "Amount of memory this NUMA node provides.",
842 optional => 1,
843 },
844 hostnodes => {
845 type => "string",
846 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
847 description => "Host NUMA nodes to use.",
848 format_description => "id[-id];...",
849 optional => 1,
850 },
851 policy => {
852 type => 'string',
853 enum => [qw(preferred bind interleave)],
854 description => "NUMA allocation policy.",
855 optional => 1,
856 },
857 };
858 PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
859 my $numadesc = {
860 optional => 1,
861 type => 'string', format => $numa_fmt,
862 description => "NUMA topology.",
863 };
864 PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
865
866 for (my $i = 0; $i < $MAX_NUMA; $i++) {
867 $confdesc->{"numa$i"} = $numadesc;
868 }
869
870 my $nic_model_list = [
871 'e1000',
872 'e1000-82540em',
873 'e1000-82544gc',
874 'e1000-82545em',
875 'e1000e',
876 'i82551',
877 'i82557b',
878 'i82559er',
879 'ne2k_isa',
880 'ne2k_pci',
881 'pcnet',
882 'rtl8139',
883 'virtio',
884 'vmxnet3',
885 ];
886 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
887
888 my $net_fmt_bridge_descr = <<__EOD__;
889 Bridge to attach the network device to. The Proxmox VE standard bridge
890 is called 'vmbr0'.
891
892 If you do not specify a bridge, we create a kvm user (NATed) network
893 device, which provides DHCP and DNS services. The following addresses
894 are used:
895
896 10.0.2.2 Gateway
897 10.0.2.3 DNS Server
898 10.0.2.4 SMB Server
899
900 The DHCP server assign addresses to the guest starting from 10.0.2.15.
901 __EOD__
902
903 my $net_fmt = {
904 macaddr => get_standard_option('mac-addr', {
905 description => "MAC address. That address must be unique withing your network. This is"
906 ." automatically generated if not specified.",
907 }),
908 model => {
909 type => 'string',
910 description => "Network Card Model. The 'virtio' model provides the best performance with"
911 ." very low CPU overhead. If your guest does not support this driver, it is usually"
912 ." best to use 'e1000'.",
913 enum => $nic_model_list,
914 default_key => 1,
915 },
916 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
917 bridge => get_standard_option('pve-bridge-id', {
918 description => $net_fmt_bridge_descr,
919 optional => 1,
920 }),
921 queues => {
922 type => 'integer',
923 minimum => 0, maximum => 16,
924 description => 'Number of packet queues to be used on the device.',
925 optional => 1,
926 },
927 rate => {
928 type => 'number',
929 minimum => 0,
930 description => "Rate limit in mbps (megabytes per second) as floating point number.",
931 optional => 1,
932 },
933 tag => {
934 type => 'integer',
935 minimum => 1, maximum => 4094,
936 description => 'VLAN tag to apply to packets on this interface.',
937 optional => 1,
938 },
939 trunks => {
940 type => 'string',
941 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
942 description => 'VLAN trunks to pass through this interface.',
943 format_description => 'vlanid[;vlanid...]',
944 optional => 1,
945 },
946 firewall => {
947 type => 'boolean',
948 description => 'Whether this interface should be protected by the firewall.',
949 optional => 1,
950 },
951 link_down => {
952 type => 'boolean',
953 description => 'Whether this interface should be disconnected (like pulling the plug).',
954 optional => 1,
955 },
956 mtu => {
957 type => 'integer',
958 minimum => 1, maximum => 65520,
959 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
960 optional => 1,
961 },
962 };
963
964 my $netdesc = {
965 optional => 1,
966 type => 'string', format => $net_fmt,
967 description => "Specify network devices.",
968 };
969
970 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
971
972 my $ipconfig_fmt = {
973 ip => {
974 type => 'string',
975 format => 'pve-ipv4-config',
976 format_description => 'IPv4Format/CIDR',
977 description => 'IPv4 address in CIDR format.',
978 optional => 1,
979 default => 'dhcp',
980 },
981 gw => {
982 type => 'string',
983 format => 'ipv4',
984 format_description => 'GatewayIPv4',
985 description => 'Default gateway for IPv4 traffic.',
986 optional => 1,
987 requires => 'ip',
988 },
989 ip6 => {
990 type => 'string',
991 format => 'pve-ipv6-config',
992 format_description => 'IPv6Format/CIDR',
993 description => 'IPv6 address in CIDR format.',
994 optional => 1,
995 default => 'dhcp',
996 },
997 gw6 => {
998 type => 'string',
999 format => 'ipv6',
1000 format_description => 'GatewayIPv6',
1001 description => 'Default gateway for IPv6 traffic.',
1002 optional => 1,
1003 requires => 'ip6',
1004 },
1005 };
1006 PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
1007 my $ipconfigdesc = {
1008 optional => 1,
1009 type => 'string', format => 'pve-qm-ipconfig',
1010 description => <<'EODESCR',
1011 cloud-init: Specify IP addresses and gateways for the corresponding interface.
1012
1013 IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1014
1015 The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1016 gateway should be provided.
1017 For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1018 cloud-init 19.4 or newer.
1019
1020 If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1021 dhcp on IPv4.
1022 EODESCR
1023 };
1024 PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1025
1026 for (my $i = 0; $i < $MAX_NETS; $i++) {
1027 $confdesc->{"net$i"} = $netdesc;
1028 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1029 }
1030
1031 foreach my $key (keys %$confdesc_cloudinit) {
1032 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1033 }
1034
1035 PVE::JSONSchema::register_format('pve-cpuset', \&pve_verify_cpuset);
1036 sub pve_verify_cpuset {
1037 my ($set_text, $noerr) = @_;
1038
1039 my ($count, $members) = eval { PVE::CpuSet::parse_cpuset($set_text) };
1040
1041 if ($@) {
1042 return if $noerr;
1043 die "unable to parse cpuset option\n";
1044 }
1045
1046 return PVE::CpuSet->new($members)->short_string();
1047 }
1048
1049 PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1050 sub verify_volume_id_or_qm_path {
1051 my ($volid, $noerr) = @_;
1052
1053 return $volid if $volid eq 'none' || $volid eq 'cdrom';
1054
1055 return verify_volume_id_or_absolute_path($volid, $noerr);
1056 }
1057
1058 PVE::JSONSchema::register_format('pve-volume-id-or-absolute-path', \&verify_volume_id_or_absolute_path);
1059 sub verify_volume_id_or_absolute_path {
1060 my ($volid, $noerr) = @_;
1061
1062 return $volid if $volid =~ m|^/|;
1063
1064 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1065 if ($@) {
1066 return if $noerr;
1067 die $@;
1068 }
1069 return $volid;
1070 }
1071
1072 my $usb_fmt = {
1073 host => {
1074 default_key => 1,
1075 type => 'string', format => 'pve-qm-usb-device',
1076 format_description => 'HOSTUSBDEVICE|spice',
1077 description => <<EODESCR,
1078 The Host USB device or port or the value 'spice'. HOSTUSBDEVICE syntax is:
1079
1080 'bus-port(.port)*' (decimal numbers) or
1081 'vendor_id:product_id' (hexadeciaml numbers) or
1082 'spice'
1083
1084 You can use the 'lsusb -t' command to list existing usb devices.
1085
1086 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1087 machines - use with special care.
1088
1089 The value 'spice' can be used to add a usb redirection devices for spice.
1090 EODESCR
1091 },
1092 usb3 => {
1093 optional => 1,
1094 type => 'boolean',
1095 description => "Specifies whether if given host option is a USB3 device or port.",
1096 default => 0,
1097 },
1098 };
1099
1100 my $usbdesc = {
1101 optional => 1,
1102 type => 'string', format => $usb_fmt,
1103 description => "Configure an USB device (n is 0 to 4).",
1104 };
1105 PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
1106
1107 my $serialdesc = {
1108 optional => 1,
1109 type => 'string',
1110 pattern => '(/dev/.+|socket)',
1111 description => "Create a serial device inside the VM (n is 0 to 3)",
1112 verbose_description => <<EODESCR,
1113 Create a serial device inside the VM (n is 0 to 3), and pass through a
1114 host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1115 host side (use 'qm terminal' to open a terminal connection).
1116
1117 NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1118 use with special care.
1119
1120 CAUTION: Experimental! User reported problems with this option.
1121 EODESCR
1122 };
1123
1124 my $paralleldesc= {
1125 optional => 1,
1126 type => 'string',
1127 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1128 description => "Map host parallel devices (n is 0 to 2).",
1129 verbose_description => <<EODESCR,
1130 Map host parallel devices (n is 0 to 2).
1131
1132 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1133 machines - use with special care.
1134
1135 CAUTION: Experimental! User reported problems with this option.
1136 EODESCR
1137 };
1138
1139 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1140 $confdesc->{"parallel$i"} = $paralleldesc;
1141 }
1142
1143 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1144 $confdesc->{"serial$i"} = $serialdesc;
1145 }
1146
1147 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1148 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1149 }
1150
1151 for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1152 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1153 }
1154
1155 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1156 $confdesc->{"usb$i"} = $usbdesc;
1157 }
1158
1159 my $boot_fmt = {
1160 legacy => {
1161 optional => 1,
1162 default_key => 1,
1163 type => 'string',
1164 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1165 . " Deprecated, use 'order=' instead.",
1166 pattern => '[acdn]{1,4}',
1167 format_description => "[acdn]{1,4}",
1168
1169 # note: this is also the fallback if boot: is not given at all
1170 default => 'cdn',
1171 },
1172 order => {
1173 optional => 1,
1174 type => 'string',
1175 format => 'pve-qm-bootdev-list',
1176 format_description => "device[;device...]",
1177 description => <<EODESC,
1178 The guest will attempt to boot from devices in the order they appear here.
1179
1180 Disks, optical drives and passed-through storage USB devices will be directly
1181 booted from, NICs will load PXE, and PCIe devices will either behave like disks
1182 (e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1183
1184 Note that only devices in this list will be marked as bootable and thus loaded
1185 by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1186 (e.g. software-raid), you need to specify all of them here.
1187
1188 Overrides the deprecated 'legacy=[acdn]*' value when given.
1189 EODESC
1190 },
1191 };
1192 PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1193
1194 PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1195 sub verify_bootdev {
1196 my ($dev, $noerr) = @_;
1197
1198 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1199 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1200
1201 my $check = sub {
1202 my ($base) = @_;
1203 return 0 if $dev !~ m/^$base\d+$/;
1204 return 0 if !$confdesc->{$dev};
1205 return 1;
1206 };
1207
1208 return $dev if $check->("net");
1209 return $dev if $check->("usb");
1210 return $dev if $check->("hostpci");
1211
1212 return if $noerr;
1213 die "invalid boot device '$dev'\n";
1214 }
1215
1216 sub print_bootorder {
1217 my ($devs) = @_;
1218 return "" if !@$devs;
1219 my $data = { order => join(';', @$devs) };
1220 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1221 }
1222
1223 my $kvm_api_version = 0;
1224
1225 sub kvm_version {
1226 return $kvm_api_version if $kvm_api_version;
1227
1228 open my $fh, '<', '/dev/kvm' or return;
1229
1230 # 0xae00 => KVM_GET_API_VERSION
1231 $kvm_api_version = ioctl($fh, 0xae00, 0);
1232 close($fh);
1233
1234 return $kvm_api_version;
1235 }
1236
1237 my $kvm_user_version = {};
1238 my $kvm_mtime = {};
1239
1240 sub kvm_user_version {
1241 my ($binary) = @_;
1242
1243 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1244 my $st = stat($binary);
1245
1246 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1247 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1248 $cachedmtime == $st->mtime;
1249
1250 $kvm_user_version->{$binary} = 'unknown';
1251 $kvm_mtime->{$binary} = $st->mtime;
1252
1253 my $code = sub {
1254 my $line = shift;
1255 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1256 $kvm_user_version->{$binary} = $2;
1257 }
1258 };
1259
1260 eval { run_command([$binary, '--version'], outfunc => $code); };
1261 warn $@ if $@;
1262
1263 return $kvm_user_version->{$binary};
1264
1265 }
1266 my sub extract_version {
1267 my ($machine_type, $version) = @_;
1268 $version = kvm_user_version() if !defined($version);
1269 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
1270 }
1271
1272 sub kernel_has_vhost_net {
1273 return -c '/dev/vhost-net';
1274 }
1275
1276 sub option_exists {
1277 my $key = shift;
1278 return defined($confdesc->{$key});
1279 }
1280
1281 my $cdrom_path;
1282 sub get_cdrom_path {
1283
1284 return $cdrom_path if $cdrom_path;
1285
1286 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
1287 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
1288 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
1289 }
1290
1291 sub get_iso_path {
1292 my ($storecfg, $vmid, $cdrom) = @_;
1293
1294 if ($cdrom eq 'cdrom') {
1295 return get_cdrom_path();
1296 } elsif ($cdrom eq 'none') {
1297 return '';
1298 } elsif ($cdrom =~ m|^/|) {
1299 return $cdrom;
1300 } else {
1301 return PVE::Storage::path($storecfg, $cdrom);
1302 }
1303 }
1304
1305 # try to convert old style file names to volume IDs
1306 sub filename_to_volume_id {
1307 my ($vmid, $file, $media) = @_;
1308
1309 if (!($file eq 'none' || $file eq 'cdrom' ||
1310 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1311
1312 return if $file =~ m|/|;
1313
1314 if ($media && $media eq 'cdrom') {
1315 $file = "local:iso/$file";
1316 } else {
1317 $file = "local:$vmid/$file";
1318 }
1319 }
1320
1321 return $file;
1322 }
1323
1324 sub verify_media_type {
1325 my ($opt, $vtype, $media) = @_;
1326
1327 return if !$media;
1328
1329 my $etype;
1330 if ($media eq 'disk') {
1331 $etype = 'images';
1332 } elsif ($media eq 'cdrom') {
1333 $etype = 'iso';
1334 } else {
1335 die "internal error";
1336 }
1337
1338 return if ($vtype eq $etype);
1339
1340 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1341 }
1342
1343 sub cleanup_drive_path {
1344 my ($opt, $storecfg, $drive) = @_;
1345
1346 # try to convert filesystem paths to volume IDs
1347
1348 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1349 ($drive->{file} !~ m|^/dev/.+|) &&
1350 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1351 ($drive->{file} !~ m/^\d+$/)) {
1352 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1353 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1354 if !$vtype;
1355 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1356 verify_media_type($opt, $vtype, $drive->{media});
1357 $drive->{file} = $volid;
1358 }
1359
1360 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1361 }
1362
1363 sub parse_hotplug_features {
1364 my ($data) = @_;
1365
1366 my $res = {};
1367
1368 return $res if $data eq '0';
1369
1370 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1371
1372 foreach my $feature (PVE::Tools::split_list($data)) {
1373 if ($feature =~ m/^(network|disk|cpu|memory|usb|cloudinit)$/) {
1374 $res->{$1} = 1;
1375 } else {
1376 die "invalid hotplug feature '$feature'\n";
1377 }
1378 }
1379 return $res;
1380 }
1381
1382 PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1383 sub pve_verify_hotplug_features {
1384 my ($value, $noerr) = @_;
1385
1386 return $value if parse_hotplug_features($value);
1387
1388 return if $noerr;
1389
1390 die "unable to parse hotplug option\n";
1391 }
1392
1393 sub scsi_inquiry {
1394 my($fh, $noerr) = @_;
1395
1396 my $SG_IO = 0x2285;
1397 my $SG_GET_VERSION_NUM = 0x2282;
1398
1399 my $versionbuf = "\x00" x 8;
1400 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1401 if (!$ret) {
1402 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1403 return;
1404 }
1405 my $version = unpack("I", $versionbuf);
1406 if ($version < 30000) {
1407 die "scsi generic interface too old\n" if !$noerr;
1408 return;
1409 }
1410
1411 my $buf = "\x00" x 36;
1412 my $sensebuf = "\x00" x 8;
1413 my $cmd = pack("C x3 C x1", 0x12, 36);
1414
1415 # see /usr/include/scsi/sg.h
1416 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1417
1418 my $packet = pack(
1419 $sg_io_hdr_t, ord('S'), -3, length($cmd), length($sensebuf), 0, length($buf), $buf, $cmd, $sensebuf, 6000
1420 );
1421
1422 $ret = ioctl($fh, $SG_IO, $packet);
1423 if (!$ret) {
1424 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1425 return;
1426 }
1427
1428 my @res = unpack($sg_io_hdr_t, $packet);
1429 if ($res[17] || $res[18]) {
1430 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1431 return;
1432 }
1433
1434 my $res = {};
1435 $res->@{qw(type removable vendor product revision)} = unpack("C C x6 A8 A16 A4", $buf);
1436
1437 $res->{removable} = $res->{removable} & 128 ? 1 : 0;
1438 $res->{type} &= 0x1F;
1439
1440 return $res;
1441 }
1442
1443 sub path_is_scsi {
1444 my ($path) = @_;
1445
1446 my $fh = IO::File->new("+<$path") || return;
1447 my $res = scsi_inquiry($fh, 1);
1448 close($fh);
1449
1450 return $res;
1451 }
1452
1453 sub print_tabletdevice_full {
1454 my ($conf, $arch) = @_;
1455
1456 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1457
1458 # we use uhci for old VMs because tablet driver was buggy in older qemu
1459 my $usbbus;
1460 if ($q35 || $arch eq 'aarch64') {
1461 $usbbus = 'ehci';
1462 } else {
1463 $usbbus = 'uhci';
1464 }
1465
1466 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1467 }
1468
1469 sub print_keyboarddevice_full {
1470 my ($conf, $arch) = @_;
1471
1472 return if $arch ne 'aarch64';
1473
1474 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1475 }
1476
1477 my sub get_drive_id {
1478 my ($drive) = @_;
1479 return "$drive->{interface}$drive->{index}";
1480 }
1481
1482 sub print_drivedevice_full {
1483 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1484
1485 my $device = '';
1486 my $maxdev = 0;
1487
1488 my $drive_id = get_drive_id($drive);
1489 if ($drive->{interface} eq 'virtio') {
1490 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1491 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1492 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1493 } elsif ($drive->{interface} eq 'scsi') {
1494
1495 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1496 my $unit = $drive->{index} % $maxdev;
1497 my $devicetype = 'hd';
1498 my $path = '';
1499 if (drive_is_cdrom($drive)) {
1500 $devicetype = 'cd';
1501 } else {
1502 if ($drive->{file} =~ m|^/|) {
1503 $path = $drive->{file};
1504 if (my $info = path_is_scsi($path)) {
1505 if ($info->{type} == 0 && $drive->{scsiblock}) {
1506 $devicetype = 'block';
1507 } elsif ($info->{type} == 1) { # tape
1508 $devicetype = 'generic';
1509 }
1510 }
1511 } else {
1512 $path = PVE::Storage::path($storecfg, $drive->{file});
1513 }
1514
1515 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
1516 my $version = extract_version($machine_type, kvm_user_version());
1517 if ($path =~ m/^iscsi\:\/\// &&
1518 !min_version($version, 4, 1)) {
1519 $devicetype = 'generic';
1520 }
1521 }
1522
1523 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1524 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
1525 } else {
1526 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1527 .",lun=$drive->{index}";
1528 }
1529 $device .= ",drive=drive-$drive_id,id=$drive_id";
1530
1531 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1532 $device .= ",rotation_rate=1";
1533 }
1534 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1535
1536 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1537 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1538 my $controller = int($drive->{index} / $maxdev);
1539 my $unit = $drive->{index} % $maxdev;
1540 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1541
1542 $device = "ide-$devicetype";
1543 if ($drive->{interface} eq 'ide') {
1544 $device .= ",bus=ide.$controller,unit=$unit";
1545 } else {
1546 $device .= ",bus=ahci$controller.$unit";
1547 }
1548 $device .= ",drive=drive-$drive_id,id=$drive_id";
1549
1550 if ($devicetype eq 'hd') {
1551 if (my $model = $drive->{model}) {
1552 $model = URI::Escape::uri_unescape($model);
1553 $device .= ",model=$model";
1554 }
1555 if ($drive->{ssd}) {
1556 $device .= ",rotation_rate=1";
1557 }
1558 }
1559 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1560 } elsif ($drive->{interface} eq 'usb') {
1561 die "implement me";
1562 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1563 } else {
1564 die "unsupported interface type";
1565 }
1566
1567 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1568
1569 if (my $serial = $drive->{serial}) {
1570 $serial = URI::Escape::uri_unescape($serial);
1571 $device .= ",serial=$serial";
1572 }
1573
1574
1575 return $device;
1576 }
1577
1578 sub get_initiator_name {
1579 my $initiator;
1580
1581 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1582 while (defined(my $line = <$fh>)) {
1583 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1584 $initiator = $1;
1585 last;
1586 }
1587 $fh->close();
1588
1589 return $initiator;
1590 }
1591
1592 sub print_drive_commandline_full {
1593 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1594
1595 my $path;
1596 my $volid = $drive->{file};
1597 my $format = $drive->{format};
1598 my $drive_id = get_drive_id($drive);
1599
1600 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1601 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1602
1603 if (drive_is_cdrom($drive)) {
1604 $path = get_iso_path($storecfg, $vmid, $volid);
1605 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
1606 } else {
1607 if ($storeid) {
1608 $path = PVE::Storage::path($storecfg, $volid);
1609 $format //= qemu_img_format($scfg, $volname);
1610 } else {
1611 $path = $volid;
1612 $format //= "raw";
1613 }
1614 }
1615
1616 my $is_rbd = $path =~ m/^rbd:/;
1617
1618 my $opts = '';
1619 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1620 foreach my $o (@qemu_drive_options) {
1621 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1622 }
1623
1624 # snapshot only accepts on|off
1625 if (defined($drive->{snapshot})) {
1626 my $v = $drive->{snapshot} ? 'on' : 'off';
1627 $opts .= ",snapshot=$v";
1628 }
1629
1630 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1631 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
1632 }
1633
1634 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1635 my ($dir, $qmpname) = @$type;
1636 if (my $v = $drive->{"mbps$dir"}) {
1637 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1638 }
1639 if (my $v = $drive->{"mbps${dir}_max"}) {
1640 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1641 }
1642 if (my $v = $drive->{"bps${dir}_max_length"}) {
1643 $opts .= ",throttling.bps$qmpname-max-length=$v";
1644 }
1645 if (my $v = $drive->{"iops${dir}"}) {
1646 $opts .= ",throttling.iops$qmpname=$v";
1647 }
1648 if (my $v = $drive->{"iops${dir}_max"}) {
1649 $opts .= ",throttling.iops$qmpname-max=$v";
1650 }
1651 if (my $v = $drive->{"iops${dir}_max_length"}) {
1652 $opts .= ",throttling.iops$qmpname-max-length=$v";
1653 }
1654 }
1655
1656 if ($pbs_name) {
1657 $format = "rbd" if $is_rbd;
1658 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1659 if !$format;
1660 $opts .= ",format=alloc-track,file.driver=$format";
1661 } elsif ($format) {
1662 $opts .= ",format=$format";
1663 }
1664
1665 my $cache_direct = 0;
1666
1667 if (my $cache = $drive->{cache}) {
1668 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1669 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1670 $opts .= ",cache=none";
1671 $cache_direct = 1;
1672 }
1673
1674 # io_uring with cache mode writeback or writethrough on krbd will hang...
1675 my $rbd_no_io_uring = $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1676
1677 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1678 # sometimes, just plain disable...
1679 my $lvm_no_io_uring = $scfg && $scfg->{type} eq 'lvm';
1680
1681 # io_uring causes problems when used with CIFS since kernel 5.15
1682 # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
1683 my $cifs_no_io_uring = $scfg && $scfg->{type} eq 'cifs';
1684
1685 if (!$drive->{aio}) {
1686 if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring && !$cifs_no_io_uring) {
1687 # io_uring supports all cache modes
1688 $opts .= ",aio=io_uring";
1689 } else {
1690 # aio native works only with O_DIRECT
1691 if($cache_direct) {
1692 $opts .= ",aio=native";
1693 } else {
1694 $opts .= ",aio=threads";
1695 }
1696 }
1697 }
1698
1699 if (!drive_is_cdrom($drive)) {
1700 my $detectzeroes;
1701 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1702 $detectzeroes = 'off';
1703 } elsif ($drive->{discard}) {
1704 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1705 } else {
1706 # This used to be our default with discard not being specified:
1707 $detectzeroes = 'on';
1708 }
1709
1710 # note: 'detect-zeroes' works per blockdev and we want it to persist
1711 # after the alloc-track is removed, so put it on 'file' directly
1712 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1713 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1714 }
1715
1716 if ($pbs_name) {
1717 $opts .= ",backing=$pbs_name";
1718 $opts .= ",auto-remove=on";
1719 }
1720
1721 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1722 my $file_param = "file";
1723 if ($pbs_name) {
1724 # non-rbd drivers require the underlying file to be a seperate block
1725 # node, so add a second .file indirection
1726 $file_param .= ".file" if !$is_rbd;
1727 $file_param .= ".filename";
1728 }
1729 my $pathinfo = $path ? "$file_param=$path," : '';
1730
1731 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1732 }
1733
1734 sub print_pbs_blockdev {
1735 my ($pbs_conf, $pbs_name) = @_;
1736 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1737 $blockdev .= ",repository=$pbs_conf->{repository}";
1738 $blockdev .= ",namespace=$pbs_conf->{namespace}" if $pbs_conf->{namespace};
1739 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1740 $blockdev .= ",archive=$pbs_conf->{archive}";
1741 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1742 return $blockdev;
1743 }
1744
1745 sub print_netdevice_full {
1746 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type) = @_;
1747
1748 my $device = $net->{model};
1749 if ($net->{model} eq 'virtio') {
1750 $device = 'virtio-net-pci';
1751 };
1752
1753 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1754 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1755 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1756 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1757 # and out of each queue plus one config interrupt and control vector queue
1758 my $vectors = $net->{queues} * 2 + 2;
1759 $tmpstr .= ",vectors=$vectors,mq=on";
1760 }
1761 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1762
1763 if (my $mtu = $net->{mtu}) {
1764 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1765 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1766 if ($mtu == 1) {
1767 $mtu = $bridge_mtu;
1768 } elsif ($mtu < 576) {
1769 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1770 } elsif ($mtu > $bridge_mtu) {
1771 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1772 }
1773 $tmpstr .= ",host_mtu=$mtu";
1774 } else {
1775 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1776 }
1777 }
1778
1779 if ($use_old_bios_files) {
1780 my $romfile;
1781 if ($device eq 'virtio-net-pci') {
1782 $romfile = 'pxe-virtio.rom';
1783 } elsif ($device eq 'e1000') {
1784 $romfile = 'pxe-e1000.rom';
1785 } elsif ($device eq 'e1000e') {
1786 $romfile = 'pxe-e1000e.rom';
1787 } elsif ($device eq 'ne2k') {
1788 $romfile = 'pxe-ne2k_pci.rom';
1789 } elsif ($device eq 'pcnet') {
1790 $romfile = 'pxe-pcnet.rom';
1791 } elsif ($device eq 'rtl8139') {
1792 $romfile = 'pxe-rtl8139.rom';
1793 }
1794 $tmpstr .= ",romfile=$romfile" if $romfile;
1795 }
1796
1797 return $tmpstr;
1798 }
1799
1800 sub print_netdev_full {
1801 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1802
1803 my $i = '';
1804 if ($netid =~ m/^net(\d+)$/) {
1805 $i = int($1);
1806 }
1807
1808 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1809
1810 my $ifname = "tap${vmid}i$i";
1811
1812 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1813 die "interface name '$ifname' is too long (max 15 character)\n"
1814 if length($ifname) >= 16;
1815
1816 my $vhostparam = '';
1817 if (is_native($arch)) {
1818 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1819 }
1820
1821 my $vmname = $conf->{name} || "vm$vmid";
1822
1823 my $netdev = "";
1824 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1825
1826 if ($net->{bridge}) {
1827 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1828 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1829 } else {
1830 $netdev = "type=user,id=$netid,hostname=$vmname";
1831 }
1832
1833 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1834
1835 return $netdev;
1836 }
1837
1838 my $vga_map = {
1839 'cirrus' => 'cirrus-vga',
1840 'std' => 'VGA',
1841 'vmware' => 'vmware-svga',
1842 'virtio' => 'virtio-vga',
1843 'virtio-gl' => 'virtio-vga-gl',
1844 };
1845
1846 sub print_vga_device {
1847 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1848
1849 my $type = $vga_map->{$vga->{type}};
1850 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1851 $type = 'virtio-gpu';
1852 }
1853 my $vgamem_mb = $vga->{memory};
1854
1855 my $max_outputs = '';
1856 if ($qxlnum) {
1857 $type = $id ? 'qxl' : 'qxl-vga';
1858
1859 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1860 # set max outputs so linux can have up to 4 qxl displays with one device
1861 if (min_version($machine_version, 4, 1)) {
1862 $max_outputs = ",max_outputs=4";
1863 }
1864 }
1865 }
1866
1867 die "no devicetype for $vga->{type}\n" if !$type;
1868
1869 my $memory = "";
1870 if ($vgamem_mb) {
1871 if ($vga->{type} =~ /^virtio/) {
1872 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1873 $memory = ",max_hostmem=$bytes";
1874 } elsif ($qxlnum) {
1875 # from https://www.spice-space.org/multiple-monitors.html
1876 $memory = ",vgamem_mb=$vga->{memory}";
1877 my $ram = $vgamem_mb * 4;
1878 my $vram = $vgamem_mb * 2;
1879 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1880 } else {
1881 $memory = ",vgamem_mb=$vga->{memory}";
1882 }
1883 } elsif ($qxlnum && $id) {
1884 $memory = ",ram_size=67108864,vram_size=33554432";
1885 }
1886
1887 my $edidoff = "";
1888 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1889 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1890 }
1891
1892 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1893 my $vgaid = "vga" . ($id // '');
1894 my $pciaddr;
1895 if ($q35 && $vgaid eq 'vga') {
1896 # the first display uses pcie.0 bus on q35 machines
1897 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1898 } else {
1899 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1900 }
1901
1902 if ($vga->{type} eq 'virtio-gl') {
1903 my $base = '/usr/lib/x86_64-linux-gnu/lib';
1904 die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
1905 if !-e "${base}EGL.so.1" || !-e "${base}GL.so.1";
1906
1907 die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
1908 if !PVE::Tools::dir_glob_regex('/dev/dri/', "renderD.*");
1909 }
1910
1911 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1912 }
1913
1914 sub parse_number_sets {
1915 my ($set) = @_;
1916 my $res = [];
1917 foreach my $part (split(/;/, $set)) {
1918 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1919 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1920 push @$res, [ $1, $2 ];
1921 } else {
1922 die "invalid range: $part\n";
1923 }
1924 }
1925 return $res;
1926 }
1927
1928 sub parse_numa {
1929 my ($data) = @_;
1930
1931 my $res = parse_property_string($numa_fmt, $data);
1932 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1933 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
1934 return $res;
1935 }
1936
1937 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1938 sub parse_net {
1939 my ($data) = @_;
1940
1941 my $res = eval { parse_property_string($net_fmt, $data) };
1942 if ($@) {
1943 warn $@;
1944 return;
1945 }
1946 if (!defined($res->{macaddr})) {
1947 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1948 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1949 }
1950 return $res;
1951 }
1952
1953 # ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1954 sub parse_ipconfig {
1955 my ($data) = @_;
1956
1957 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1958 if ($@) {
1959 warn $@;
1960 return;
1961 }
1962
1963 if ($res->{gw} && !$res->{ip}) {
1964 warn 'gateway specified without specifying an IP address';
1965 return;
1966 }
1967 if ($res->{gw6} && !$res->{ip6}) {
1968 warn 'IPv6 gateway specified without specifying an IPv6 address';
1969 return;
1970 }
1971 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1972 warn 'gateway specified together with DHCP';
1973 return;
1974 }
1975 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1976 # gw6 + auto/dhcp
1977 warn "IPv6 gateway specified together with $res->{ip6} address";
1978 return;
1979 }
1980
1981 if (!$res->{ip} && !$res->{ip6}) {
1982 return { ip => 'dhcp', ip6 => 'dhcp' };
1983 }
1984
1985 return $res;
1986 }
1987
1988 sub print_net {
1989 my $net = shift;
1990
1991 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1992 }
1993
1994 sub add_random_macs {
1995 my ($settings) = @_;
1996
1997 foreach my $opt (keys %$settings) {
1998 next if $opt !~ m/^net(\d+)$/;
1999 my $net = parse_net($settings->{$opt});
2000 next if !$net;
2001 $settings->{$opt} = print_net($net);
2002 }
2003 }
2004
2005 sub vm_is_volid_owner {
2006 my ($storecfg, $vmid, $volid) = @_;
2007
2008 if ($volid !~ m|^/|) {
2009 my ($path, $owner);
2010 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
2011 if ($owner && ($owner == $vmid)) {
2012 return 1;
2013 }
2014 }
2015
2016 return;
2017 }
2018
2019 sub vmconfig_register_unused_drive {
2020 my ($storecfg, $vmid, $conf, $drive) = @_;
2021
2022 if (drive_is_cloudinit($drive)) {
2023 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
2024 warn $@ if $@;
2025 delete $conf->{cloudinit};
2026 } elsif (!drive_is_cdrom($drive)) {
2027 my $volid = $drive->{file};
2028 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
2029 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
2030 }
2031 }
2032 }
2033
2034 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
2035 my $smbios1_fmt = {
2036 uuid => {
2037 type => 'string',
2038 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
2039 format_description => 'UUID',
2040 description => "Set SMBIOS1 UUID.",
2041 optional => 1,
2042 },
2043 version => {
2044 type => 'string',
2045 pattern => '[A-Za-z0-9+\/]+={0,2}',
2046 format_description => 'Base64 encoded string',
2047 description => "Set SMBIOS1 version.",
2048 optional => 1,
2049 },
2050 serial => {
2051 type => 'string',
2052 pattern => '[A-Za-z0-9+\/]+={0,2}',
2053 format_description => 'Base64 encoded string',
2054 description => "Set SMBIOS1 serial number.",
2055 optional => 1,
2056 },
2057 manufacturer => {
2058 type => 'string',
2059 pattern => '[A-Za-z0-9+\/]+={0,2}',
2060 format_description => 'Base64 encoded string',
2061 description => "Set SMBIOS1 manufacturer.",
2062 optional => 1,
2063 },
2064 product => {
2065 type => 'string',
2066 pattern => '[A-Za-z0-9+\/]+={0,2}',
2067 format_description => 'Base64 encoded string',
2068 description => "Set SMBIOS1 product ID.",
2069 optional => 1,
2070 },
2071 sku => {
2072 type => 'string',
2073 pattern => '[A-Za-z0-9+\/]+={0,2}',
2074 format_description => 'Base64 encoded string',
2075 description => "Set SMBIOS1 SKU string.",
2076 optional => 1,
2077 },
2078 family => {
2079 type => 'string',
2080 pattern => '[A-Za-z0-9+\/]+={0,2}',
2081 format_description => 'Base64 encoded string',
2082 description => "Set SMBIOS1 family string.",
2083 optional => 1,
2084 },
2085 base64 => {
2086 type => 'boolean',
2087 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2088 optional => 1,
2089 },
2090 };
2091
2092 sub parse_smbios1 {
2093 my ($data) = @_;
2094
2095 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2096 warn $@ if $@;
2097 return $res;
2098 }
2099
2100 sub print_smbios1 {
2101 my ($smbios1) = @_;
2102 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2103 }
2104
2105 PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2106
2107 sub parse_watchdog {
2108 my ($value) = @_;
2109
2110 return if !$value;
2111
2112 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2113 warn $@ if $@;
2114 return $res;
2115 }
2116
2117 sub parse_guest_agent {
2118 my ($conf) = @_;
2119
2120 return {} if !defined($conf->{agent});
2121
2122 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2123 warn $@ if $@;
2124
2125 # if the agent is disabled ignore the other potentially set properties
2126 return {} if !$res->{enabled};
2127 return $res;
2128 }
2129
2130 sub get_qga_key {
2131 my ($conf, $key) = @_;
2132 return undef if !defined($conf->{agent});
2133
2134 my $agent = parse_guest_agent($conf);
2135 return $agent->{$key};
2136 }
2137
2138 sub parse_vga {
2139 my ($value) = @_;
2140
2141 return {} if !$value;
2142 my $res = eval { parse_property_string($vga_fmt, $value) };
2143 warn $@ if $@;
2144 return $res;
2145 }
2146
2147 sub parse_rng {
2148 my ($value) = @_;
2149
2150 return if !$value;
2151
2152 my $res = eval { parse_property_string($rng_fmt, $value) };
2153 warn $@ if $@;
2154 return $res;
2155 }
2156
2157 sub parse_meta_info {
2158 my ($value) = @_;
2159
2160 return if !$value;
2161
2162 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2163 warn $@ if $@;
2164 return $res;
2165 }
2166
2167 sub new_meta_info_string {
2168 my () = @_; # for now do not allow to override any value
2169
2170 return PVE::JSONSchema::print_property_string(
2171 {
2172 'creation-qemu' => kvm_user_version(),
2173 ctime => "". int(time()),
2174 },
2175 $meta_info_fmt
2176 );
2177 }
2178
2179 sub qemu_created_version_fixups {
2180 my ($conf, $forcemachine, $kvmver) = @_;
2181
2182 my $meta = parse_meta_info($conf->{meta}) // {};
2183 my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
2184
2185 # check if we need to apply some handling for VMs that always use the latest machine version but
2186 # had a machine version transition happen that affected HW such that, e.g., an OS config change
2187 # would be required (we do not want to pin machine version for non-windows OS type)
2188 if (
2189 (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
2190 && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
2191 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
2192 && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
2193 ) {
2194 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
2195 if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
2196 # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
2197 # and thus with the predictable interface naming of systemd
2198 return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
2199 }
2200 }
2201 return;
2202 }
2203
2204 PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
2205 sub verify_usb_device {
2206 my ($value, $noerr) = @_;
2207
2208 return $value if parse_usb_device($value);
2209
2210 return if $noerr;
2211
2212 die "unable to parse usb device\n";
2213 }
2214
2215 # add JSON properties for create and set function
2216 sub json_config_properties {
2217 my ($prop, $with_disk_alloc) = @_;
2218
2219 my $skip_json_config_opts = {
2220 parent => 1,
2221 snaptime => 1,
2222 vmstate => 1,
2223 runningmachine => 1,
2224 runningcpu => 1,
2225 meta => 1,
2226 };
2227
2228 foreach my $opt (keys %$confdesc) {
2229 next if $skip_json_config_opts->{$opt};
2230
2231 if ($with_disk_alloc && is_valid_drivename($opt)) {
2232 $prop->{$opt} = $PVE::QemuServer::Drive::drivedesc_hash_with_alloc->{$opt};
2233 } else {
2234 $prop->{$opt} = $confdesc->{$opt};
2235 }
2236 }
2237
2238 return $prop;
2239 }
2240
2241 # Properties that we can read from an OVF file
2242 sub json_ovf_properties {
2243 my $prop = {};
2244
2245 for my $device (PVE::QemuServer::Drive::valid_drive_names()) {
2246 $prop->{$device} = {
2247 type => 'string',
2248 format => 'pve-volume-id-or-absolute-path',
2249 description => "Disk image that gets imported to $device",
2250 optional => 1,
2251 };
2252 }
2253
2254 $prop->{cores} = {
2255 type => 'integer',
2256 description => "The number of CPU cores.",
2257 optional => 1,
2258 };
2259 $prop->{memory} = {
2260 type => 'integer',
2261 description => "Amount of RAM for the VM in MB.",
2262 optional => 1,
2263 };
2264 $prop->{name} = {
2265 type => 'string',
2266 description => "Name of the VM.",
2267 optional => 1,
2268 };
2269
2270 return $prop;
2271 }
2272
2273 # return copy of $confdesc_cloudinit to generate documentation
2274 sub cloudinit_config_properties {
2275
2276 return dclone($confdesc_cloudinit);
2277 }
2278
2279 sub check_type {
2280 my ($key, $value) = @_;
2281
2282 die "unknown setting '$key'\n" if !$confdesc->{$key};
2283
2284 my $type = $confdesc->{$key}->{type};
2285
2286 if (!defined($value)) {
2287 die "got undefined value\n";
2288 }
2289
2290 if ($value =~ m/[\n\r]/) {
2291 die "property contains a line feed\n";
2292 }
2293
2294 if ($type eq 'boolean') {
2295 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2296 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2297 die "type check ('boolean') failed - got '$value'\n";
2298 } elsif ($type eq 'integer') {
2299 return int($1) if $value =~ m/^(\d+)$/;
2300 die "type check ('integer') failed - got '$value'\n";
2301 } elsif ($type eq 'number') {
2302 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2303 die "type check ('number') failed - got '$value'\n";
2304 } elsif ($type eq 'string') {
2305 if (my $fmt = $confdesc->{$key}->{format}) {
2306 PVE::JSONSchema::check_format($fmt, $value);
2307 return $value;
2308 }
2309 $value =~ s/^\"(.*)\"$/$1/;
2310 return $value;
2311 } else {
2312 die "internal error"
2313 }
2314 }
2315
2316 sub destroy_vm {
2317 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2318
2319 my $conf = PVE::QemuConfig->load_config($vmid);
2320
2321 PVE::QemuConfig->check_lock($conf) if !$skiplock;
2322
2323 if ($conf->{template}) {
2324 # check if any base image is still used by a linked clone
2325 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2326 my ($ds, $drive) = @_;
2327 return if drive_is_cdrom($drive);
2328
2329 my $volid = $drive->{file};
2330 return if !$volid || $volid =~ m|^/|;
2331
2332 die "base volume '$volid' is still in use by linked cloned\n"
2333 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2334
2335 });
2336 }
2337
2338 my $volids = {};
2339 my $remove_owned_drive = sub {
2340 my ($ds, $drive) = @_;
2341 return if drive_is_cdrom($drive, 1);
2342
2343 my $volid = $drive->{file};
2344 return if !$volid || $volid =~ m|^/|;
2345 return if $volids->{$volid};
2346
2347 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2348 return if !$path || !$owner || ($owner != $vmid);
2349
2350 $volids->{$volid} = 1;
2351 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2352 warn "Could not remove disk '$volid', check manually: $@" if $@;
2353 };
2354
2355 # only remove disks owned by this VM (referenced in the config)
2356 my $include_opts = {
2357 include_unused => 1,
2358 extra_keys => ['vmstate'],
2359 };
2360 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2361
2362 for my $snap (values %{$conf->{snapshots}}) {
2363 next if !defined($snap->{vmstate});
2364 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2365 next if !defined($drive);
2366 $remove_owned_drive->('vmstate', $drive);
2367 }
2368
2369 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2370
2371 if ($purge_unreferenced) { # also remove unreferenced disk
2372 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2373 PVE::Storage::foreach_volid($vmdisks, sub {
2374 my ($volid, $sid, $volname, $d) = @_;
2375 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2376 warn $@ if $@;
2377 });
2378 }
2379
2380 if (defined $replacement_conf) {
2381 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2382 } else {
2383 PVE::QemuConfig->destroy_config($vmid);
2384 }
2385 }
2386
2387 sub parse_vm_config {
2388 my ($filename, $raw, $strict) = @_;
2389
2390 return if !defined($raw);
2391
2392 my $res = {
2393 digest => Digest::SHA::sha1_hex($raw),
2394 snapshots => {},
2395 pending => {},
2396 cloudinit => {},
2397 };
2398
2399 my $handle_error = sub {
2400 my ($msg) = @_;
2401
2402 if ($strict) {
2403 die $msg;
2404 } else {
2405 warn $msg;
2406 }
2407 };
2408
2409 $filename =~ m|/qemu-server/(\d+)\.conf$|
2410 || die "got strange filename '$filename'";
2411
2412 my $vmid = $1;
2413
2414 my $conf = $res;
2415 my $descr;
2416 my $section = '';
2417
2418 my @lines = split(/\n/, $raw);
2419 foreach my $line (@lines) {
2420 next if $line =~ m/^\s*$/;
2421
2422 if ($line =~ m/^\[PENDING\]\s*$/i) {
2423 $section = 'pending';
2424 if (defined($descr)) {
2425 $descr =~ s/\s+$//;
2426 $conf->{description} = $descr;
2427 }
2428 $descr = undef;
2429 $conf = $res->{$section} = {};
2430 next;
2431 } elsif ($line =~ m/^\[special:cloudinit\]\s*$/i) {
2432 $section = 'cloudinit';
2433 $descr = undef;
2434 $conf = $res->{$section} = {};
2435 next;
2436
2437 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2438 $section = $1;
2439 if (defined($descr)) {
2440 $descr =~ s/\s+$//;
2441 $conf->{description} = $descr;
2442 }
2443 $descr = undef;
2444 $conf = $res->{snapshots}->{$section} = {};
2445 next;
2446 }
2447
2448 if ($line =~ m/^\#(.*)$/) {
2449 $descr = '' if !defined($descr);
2450 $descr .= PVE::Tools::decode_text($1) . "\n";
2451 next;
2452 }
2453
2454 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2455 $descr = '' if !defined($descr);
2456 $descr .= PVE::Tools::decode_text($2);
2457 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2458 $conf->{snapstate} = $1;
2459 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2460 my $key = $1;
2461 my $value = $2;
2462 $conf->{$key} = $value;
2463 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2464 my $value = $1;
2465 if ($section eq 'pending') {
2466 $conf->{delete} = $value; # we parse this later
2467 } else {
2468 $handle_error->("vm $vmid - property 'delete' is only allowed in [PENDING]\n");
2469 }
2470 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2471 my $key = $1;
2472 my $value = $2;
2473 eval { $value = check_type($key, $value); };
2474 if ($@) {
2475 $handle_error->("vm $vmid - unable to parse value of '$key' - $@");
2476 } else {
2477 $key = 'ide2' if $key eq 'cdrom';
2478 my $fmt = $confdesc->{$key}->{format};
2479 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2480 my $v = parse_drive($key, $value);
2481 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2482 $v->{file} = $volid;
2483 $value = print_drive($v);
2484 } else {
2485 $handle_error->("vm $vmid - unable to parse value of '$key'\n");
2486 next;
2487 }
2488 }
2489
2490 $conf->{$key} = $value;
2491 }
2492 } else {
2493 $handle_error->("vm $vmid - unable to parse config: $line\n");
2494 }
2495 }
2496
2497 if (defined($descr)) {
2498 $descr =~ s/\s+$//;
2499 $conf->{description} = $descr;
2500 }
2501 delete $res->{snapstate}; # just to be sure
2502
2503 return $res;
2504 }
2505
2506 sub write_vm_config {
2507 my ($filename, $conf) = @_;
2508
2509 delete $conf->{snapstate}; # just to be sure
2510
2511 if ($conf->{cdrom}) {
2512 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2513 $conf->{ide2} = $conf->{cdrom};
2514 delete $conf->{cdrom};
2515 }
2516
2517 # we do not use 'smp' any longer
2518 if ($conf->{sockets}) {
2519 delete $conf->{smp};
2520 } elsif ($conf->{smp}) {
2521 $conf->{sockets} = $conf->{smp};
2522 delete $conf->{cores};
2523 delete $conf->{smp};
2524 }
2525
2526 my $used_volids = {};
2527
2528 my $cleanup_config = sub {
2529 my ($cref, $pending, $snapname) = @_;
2530
2531 foreach my $key (keys %$cref) {
2532 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2533 $key eq 'snapstate' || $key eq 'pending' || $key eq 'cloudinit';
2534 my $value = $cref->{$key};
2535 if ($key eq 'delete') {
2536 die "propertry 'delete' is only allowed in [PENDING]\n"
2537 if !$pending;
2538 # fixme: check syntax?
2539 next;
2540 }
2541 eval { $value = check_type($key, $value); };
2542 die "unable to parse value of '$key' - $@" if $@;
2543
2544 $cref->{$key} = $value;
2545
2546 if (!$snapname && is_valid_drivename($key)) {
2547 my $drive = parse_drive($key, $value);
2548 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2549 }
2550 }
2551 };
2552
2553 &$cleanup_config($conf);
2554
2555 &$cleanup_config($conf->{pending}, 1);
2556
2557 &$cleanup_config($conf->{cloudinit});
2558
2559 foreach my $snapname (keys %{$conf->{snapshots}}) {
2560 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2561 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2562 }
2563
2564 # remove 'unusedX' settings if we re-add a volume
2565 foreach my $key (keys %$conf) {
2566 my $value = $conf->{$key};
2567 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2568 delete $conf->{$key};
2569 }
2570 }
2571
2572 my $generate_raw_config = sub {
2573 my ($conf, $pending) = @_;
2574
2575 my $raw = '';
2576
2577 # add description as comment to top of file
2578 if (defined(my $descr = $conf->{description})) {
2579 if ($descr) {
2580 foreach my $cl (split(/\n/, $descr)) {
2581 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2582 }
2583 } else {
2584 $raw .= "#\n" if $pending;
2585 }
2586 }
2587
2588 foreach my $key (sort keys %$conf) {
2589 next if $key =~ /^(digest|description|pending|cloudinit|snapshots)$/;
2590 $raw .= "$key: $conf->{$key}\n";
2591 }
2592 return $raw;
2593 };
2594
2595 my $raw = &$generate_raw_config($conf);
2596
2597 if (scalar(keys %{$conf->{pending}})){
2598 $raw .= "\n[PENDING]\n";
2599 $raw .= &$generate_raw_config($conf->{pending}, 1);
2600 }
2601
2602 if (scalar(keys %{$conf->{cloudinit}})){
2603 $raw .= "\n[special:cloudinit]\n";
2604 $raw .= &$generate_raw_config($conf->{cloudinit});
2605 }
2606
2607 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2608 $raw .= "\n[$snapname]\n";
2609 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2610 }
2611
2612 return $raw;
2613 }
2614
2615 sub load_defaults {
2616
2617 my $res = {};
2618
2619 # we use static defaults from our JSON schema configuration
2620 foreach my $key (keys %$confdesc) {
2621 if (defined(my $default = $confdesc->{$key}->{default})) {
2622 $res->{$key} = $default;
2623 }
2624 }
2625
2626 return $res;
2627 }
2628
2629 sub config_list {
2630 my $vmlist = PVE::Cluster::get_vmlist();
2631 my $res = {};
2632 return $res if !$vmlist || !$vmlist->{ids};
2633 my $ids = $vmlist->{ids};
2634 my $nodename = nodename();
2635
2636 foreach my $vmid (keys %$ids) {
2637 my $d = $ids->{$vmid};
2638 next if !$d->{node} || $d->{node} ne $nodename;
2639 next if !$d->{type} || $d->{type} ne 'qemu';
2640 $res->{$vmid}->{exists} = 1;
2641 }
2642 return $res;
2643 }
2644
2645 # test if VM uses local resources (to prevent migration)
2646 sub check_local_resources {
2647 my ($conf, $noerr) = @_;
2648
2649 my @loc_res = ();
2650
2651 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2652 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2653
2654 push @loc_res, "ivshmem" if $conf->{ivshmem};
2655
2656 foreach my $k (keys %$conf) {
2657 next if $k =~ m/^usb/ && ($conf->{$k} =~ m/^spice(?![^,])/);
2658 # sockets are safe: they will recreated be on the target side post-migrate
2659 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2660 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2661 }
2662
2663 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2664
2665 return \@loc_res;
2666 }
2667
2668 # check if used storages are available on all nodes (use by migrate)
2669 sub check_storage_availability {
2670 my ($storecfg, $conf, $node) = @_;
2671
2672 PVE::QemuConfig->foreach_volume($conf, sub {
2673 my ($ds, $drive) = @_;
2674
2675 my $volid = $drive->{file};
2676 return if !$volid;
2677
2678 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2679 return if !$sid;
2680
2681 # check if storage is available on both nodes
2682 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2683 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2684
2685 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2686
2687 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2688 if !$scfg->{content}->{$vtype};
2689 });
2690 }
2691
2692 # list nodes where all VM images are available (used by has_feature API)
2693 sub shared_nodes {
2694 my ($conf, $storecfg) = @_;
2695
2696 my $nodelist = PVE::Cluster::get_nodelist();
2697 my $nodehash = { map { $_ => 1 } @$nodelist };
2698 my $nodename = nodename();
2699
2700 PVE::QemuConfig->foreach_volume($conf, sub {
2701 my ($ds, $drive) = @_;
2702
2703 my $volid = $drive->{file};
2704 return if !$volid;
2705
2706 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2707 if ($storeid) {
2708 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2709 if ($scfg->{disable}) {
2710 $nodehash = {};
2711 } elsif (my $avail = $scfg->{nodes}) {
2712 foreach my $node (keys %$nodehash) {
2713 delete $nodehash->{$node} if !$avail->{$node};
2714 }
2715 } elsif (!$scfg->{shared}) {
2716 foreach my $node (keys %$nodehash) {
2717 delete $nodehash->{$node} if $node ne $nodename
2718 }
2719 }
2720 }
2721 });
2722
2723 return $nodehash
2724 }
2725
2726 sub check_local_storage_availability {
2727 my ($conf, $storecfg) = @_;
2728
2729 my $nodelist = PVE::Cluster::get_nodelist();
2730 my $nodehash = { map { $_ => {} } @$nodelist };
2731
2732 PVE::QemuConfig->foreach_volume($conf, sub {
2733 my ($ds, $drive) = @_;
2734
2735 my $volid = $drive->{file};
2736 return if !$volid;
2737
2738 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2739 if ($storeid) {
2740 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2741
2742 if ($scfg->{disable}) {
2743 foreach my $node (keys %$nodehash) {
2744 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2745 }
2746 } elsif (my $avail = $scfg->{nodes}) {
2747 foreach my $node (keys %$nodehash) {
2748 if (!$avail->{$node}) {
2749 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2750 }
2751 }
2752 }
2753 }
2754 });
2755
2756 foreach my $node (values %$nodehash) {
2757 if (my $unavail = $node->{unavailable_storages}) {
2758 $node->{unavailable_storages} = [ sort keys %$unavail ];
2759 }
2760 }
2761
2762 return $nodehash
2763 }
2764
2765 # Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2766 sub check_running {
2767 my ($vmid, $nocheck, $node) = @_;
2768
2769 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2770 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2771 }
2772
2773 sub vzlist {
2774
2775 my $vzlist = config_list();
2776
2777 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2778
2779 while (defined(my $de = $fd->read)) {
2780 next if $de !~ m/^(\d+)\.pid$/;
2781 my $vmid = $1;
2782 next if !defined($vzlist->{$vmid});
2783 if (my $pid = check_running($vmid)) {
2784 $vzlist->{$vmid}->{pid} = $pid;
2785 }
2786 }
2787
2788 return $vzlist;
2789 }
2790
2791 our $vmstatus_return_properties = {
2792 vmid => get_standard_option('pve-vmid'),
2793 status => {
2794 description => "Qemu process status.",
2795 type => 'string',
2796 enum => ['stopped', 'running'],
2797 },
2798 maxmem => {
2799 description => "Maximum memory in bytes.",
2800 type => 'integer',
2801 optional => 1,
2802 renderer => 'bytes',
2803 },
2804 maxdisk => {
2805 description => "Root disk size in bytes.",
2806 type => 'integer',
2807 optional => 1,
2808 renderer => 'bytes',
2809 },
2810 name => {
2811 description => "VM name.",
2812 type => 'string',
2813 optional => 1,
2814 },
2815 qmpstatus => {
2816 description => "Qemu QMP agent status.",
2817 type => 'string',
2818 optional => 1,
2819 },
2820 pid => {
2821 description => "PID of running qemu process.",
2822 type => 'integer',
2823 optional => 1,
2824 },
2825 uptime => {
2826 description => "Uptime.",
2827 type => 'integer',
2828 optional => 1,
2829 renderer => 'duration',
2830 },
2831 cpus => {
2832 description => "Maximum usable CPUs.",
2833 type => 'number',
2834 optional => 1,
2835 },
2836 lock => {
2837 description => "The current config lock, if any.",
2838 type => 'string',
2839 optional => 1,
2840 },
2841 tags => {
2842 description => "The current configured tags, if any",
2843 type => 'string',
2844 optional => 1,
2845 },
2846 'running-machine' => {
2847 description => "The currently running machine type (if running).",
2848 type => 'string',
2849 optional => 1,
2850 },
2851 'running-qemu' => {
2852 description => "The currently running QEMU version (if running).",
2853 type => 'string',
2854 optional => 1,
2855 },
2856 };
2857
2858 my $last_proc_pid_stat;
2859
2860 # get VM status information
2861 # This must be fast and should not block ($full == false)
2862 # We only query KVM using QMP if $full == true (this can be slow)
2863 sub vmstatus {
2864 my ($opt_vmid, $full) = @_;
2865
2866 my $res = {};
2867
2868 my $storecfg = PVE::Storage::config();
2869
2870 my $list = vzlist();
2871 my $defaults = load_defaults();
2872
2873 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2874
2875 my $cpucount = $cpuinfo->{cpus} || 1;
2876
2877 foreach my $vmid (keys %$list) {
2878 next if $opt_vmid && ($vmid ne $opt_vmid);
2879
2880 my $conf = PVE::QemuConfig->load_config($vmid);
2881
2882 my $d = { vmid => int($vmid) };
2883 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2884
2885 # fixme: better status?
2886 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2887
2888 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2889 if (defined($size)) {
2890 $d->{disk} = 0; # no info available
2891 $d->{maxdisk} = $size;
2892 } else {
2893 $d->{disk} = 0;
2894 $d->{maxdisk} = 0;
2895 }
2896
2897 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2898 * ($conf->{cores} || $defaults->{cores});
2899 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2900 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2901
2902 $d->{name} = $conf->{name} || "VM $vmid";
2903 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2904 : $defaults->{memory}*(1024*1024);
2905
2906 if ($conf->{balloon}) {
2907 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2908 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2909 : $defaults->{shares};
2910 }
2911
2912 $d->{uptime} = 0;
2913 $d->{cpu} = 0;
2914 $d->{mem} = 0;
2915
2916 $d->{netout} = 0;
2917 $d->{netin} = 0;
2918
2919 $d->{diskread} = 0;
2920 $d->{diskwrite} = 0;
2921
2922 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2923
2924 $d->{serial} = 1 if conf_has_serial($conf);
2925 $d->{lock} = $conf->{lock} if $conf->{lock};
2926 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2927
2928 $res->{$vmid} = $d;
2929 }
2930
2931 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2932 foreach my $dev (keys %$netdev) {
2933 next if $dev !~ m/^tap([1-9]\d*)i/;
2934 my $vmid = $1;
2935 my $d = $res->{$vmid};
2936 next if !$d;
2937
2938 $d->{netout} += $netdev->{$dev}->{receive};
2939 $d->{netin} += $netdev->{$dev}->{transmit};
2940
2941 if ($full) {
2942 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2943 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2944 }
2945
2946 }
2947
2948 my $ctime = gettimeofday;
2949
2950 foreach my $vmid (keys %$list) {
2951
2952 my $d = $res->{$vmid};
2953 my $pid = $d->{pid};
2954 next if !$pid;
2955
2956 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2957 next if !$pstat; # not running
2958
2959 my $used = $pstat->{utime} + $pstat->{stime};
2960
2961 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2962
2963 if ($pstat->{vsize}) {
2964 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2965 }
2966
2967 my $old = $last_proc_pid_stat->{$pid};
2968 if (!$old) {
2969 $last_proc_pid_stat->{$pid} = {
2970 time => $ctime,
2971 used => $used,
2972 cpu => 0,
2973 };
2974 next;
2975 }
2976
2977 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
2978
2979 if ($dtime > 1000) {
2980 my $dutime = $used - $old->{used};
2981
2982 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
2983 $last_proc_pid_stat->{$pid} = {
2984 time => $ctime,
2985 used => $used,
2986 cpu => $d->{cpu},
2987 };
2988 } else {
2989 $d->{cpu} = $old->{cpu};
2990 }
2991 }
2992
2993 return $res if !$full;
2994
2995 my $qmpclient = PVE::QMPClient->new();
2996
2997 my $ballooncb = sub {
2998 my ($vmid, $resp) = @_;
2999
3000 my $info = $resp->{'return'};
3001 return if !$info->{max_mem};
3002
3003 my $d = $res->{$vmid};
3004
3005 # use memory assigned to VM
3006 $d->{maxmem} = $info->{max_mem};
3007 $d->{balloon} = $info->{actual};
3008
3009 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
3010 $d->{mem} = $info->{total_mem} - $info->{free_mem};
3011 $d->{freemem} = $info->{free_mem};
3012 }
3013
3014 $d->{ballooninfo} = $info;
3015 };
3016
3017 my $blockstatscb = sub {
3018 my ($vmid, $resp) = @_;
3019 my $data = $resp->{'return'} || [];
3020 my $totalrdbytes = 0;
3021 my $totalwrbytes = 0;
3022
3023 for my $blockstat (@$data) {
3024 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
3025 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
3026
3027 $blockstat->{device} =~ s/drive-//;
3028 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
3029 }
3030 $res->{$vmid}->{diskread} = $totalrdbytes;
3031 $res->{$vmid}->{diskwrite} = $totalwrbytes;
3032 };
3033
3034 my $machinecb = sub {
3035 my ($vmid, $resp) = @_;
3036 my $data = $resp->{'return'} || [];
3037
3038 $res->{$vmid}->{'running-machine'} =
3039 PVE::QemuServer::Machine::current_from_query_machines($data);
3040 };
3041
3042 my $versioncb = sub {
3043 my ($vmid, $resp) = @_;
3044 my $data = $resp->{'return'} // {};
3045 my $version = 'unknown';
3046
3047 if (my $v = $data->{qemu}) {
3048 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
3049 }
3050
3051 $res->{$vmid}->{'running-qemu'} = $version;
3052 };
3053
3054 my $statuscb = sub {
3055 my ($vmid, $resp) = @_;
3056
3057 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
3058 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
3059 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
3060 # this fails if ballon driver is not loaded, so this must be
3061 # the last commnand (following command are aborted if this fails).
3062 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
3063
3064 my $status = 'unknown';
3065 if (!defined($status = $resp->{'return'}->{status})) {
3066 warn "unable to get VM status\n";
3067 return;
3068 }
3069
3070 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
3071 };
3072
3073 foreach my $vmid (keys %$list) {
3074 next if $opt_vmid && ($vmid ne $opt_vmid);
3075 next if !$res->{$vmid}->{pid}; # not running
3076 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
3077 }
3078
3079 $qmpclient->queue_execute(undef, 2);
3080
3081 foreach my $vmid (keys %$list) {
3082 next if $opt_vmid && ($vmid ne $opt_vmid);
3083 next if !$res->{$vmid}->{pid}; #not running
3084
3085 # we can't use the $qmpclient since it might have already aborted on
3086 # 'query-balloon', but this might also fail for older versions...
3087 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
3088 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
3089 }
3090
3091 foreach my $vmid (keys %$list) {
3092 next if $opt_vmid && ($vmid ne $opt_vmid);
3093 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
3094 }
3095
3096 return $res;
3097 }
3098
3099 sub conf_has_serial {
3100 my ($conf) = @_;
3101
3102 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3103 if ($conf->{"serial$i"}) {
3104 return 1;
3105 }
3106 }
3107
3108 return 0;
3109 }
3110
3111 sub conf_has_audio {
3112 my ($conf, $id) = @_;
3113
3114 $id //= 0;
3115 my $audio = $conf->{"audio$id"};
3116 return if !defined($audio);
3117
3118 my $audioproperties = parse_property_string($audio_fmt, $audio);
3119 my $audiodriver = $audioproperties->{driver} // 'spice';
3120
3121 return {
3122 dev => $audioproperties->{device},
3123 dev_id => "audiodev$id",
3124 backend => $audiodriver,
3125 backend_id => "$audiodriver-backend${id}",
3126 };
3127 }
3128
3129 sub audio_devs {
3130 my ($audio, $audiopciaddr, $machine_version) = @_;
3131
3132 my $devs = [];
3133
3134 my $id = $audio->{dev_id};
3135 my $audiodev = "";
3136 if (min_version($machine_version, 4, 2)) {
3137 $audiodev = ",audiodev=$audio->{backend_id}";
3138 }
3139
3140 if ($audio->{dev} eq 'AC97') {
3141 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
3142 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3143 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
3144 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3145 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
3146 } else {
3147 die "unkown audio device '$audio->{dev}', implement me!";
3148 }
3149
3150 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3151
3152 return $devs;
3153 }
3154
3155 sub get_tpm_paths {
3156 my ($vmid) = @_;
3157 return {
3158 socket => "/var/run/qemu-server/$vmid.swtpm",
3159 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3160 };
3161 }
3162
3163 sub add_tpm_device {
3164 my ($vmid, $devices, $conf) = @_;
3165
3166 return if !$conf->{tpmstate0};
3167
3168 my $paths = get_tpm_paths($vmid);
3169
3170 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3171 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3172 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3173 }
3174
3175 sub start_swtpm {
3176 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3177
3178 return if !$tpmdrive;
3179
3180 my $state;
3181 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3182 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3183 if ($storeid) {
3184 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3185 } else {
3186 $state = $tpm->{file};
3187 }
3188
3189 my $paths = get_tpm_paths($vmid);
3190
3191 # during migration, we will get state from remote
3192 #
3193 if (!$migration) {
3194 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3195 my $setup_cmd = [
3196 "swtpm_setup",
3197 "--tpmstate",
3198 "file://$state",
3199 "--createek",
3200 "--create-ek-cert",
3201 "--create-platform-cert",
3202 "--lock-nvram",
3203 "--config",
3204 "/etc/swtpm_setup.conf", # do not use XDG configs
3205 "--runas",
3206 "0", # force creation as root, error if not possible
3207 "--not-overwrite", # ignore existing state, do not modify
3208 ];
3209
3210 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3211 # TPM 2.0 supports ECC crypto, use if possible
3212 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3213
3214 run_command($setup_cmd, outfunc => sub {
3215 print "swtpm_setup: $1\n";
3216 });
3217 }
3218
3219 my $emulator_cmd = [
3220 "swtpm",
3221 "socket",
3222 "--tpmstate",
3223 "backend-uri=file://$state,mode=0600",
3224 "--ctrl",
3225 "type=unixio,path=$paths->{socket},mode=0600",
3226 "--pid",
3227 "file=$paths->{pid}",
3228 "--terminate", # terminate on QEMU disconnect
3229 "--daemon",
3230 ];
3231 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3232 run_command($emulator_cmd, outfunc => sub { print $1; });
3233
3234 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3235 while (! -e $paths->{pid}) {
3236 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3237 usleep(50_000);
3238 }
3239
3240 # return untainted PID of swtpm daemon so it can be killed on error
3241 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3242 return $1;
3243 }
3244
3245 sub vga_conf_has_spice {
3246 my ($vga) = @_;
3247
3248 my $vgaconf = parse_vga($vga);
3249 my $vgatype = $vgaconf->{type};
3250 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3251
3252 return $1 || 1;
3253 }
3254
3255 sub is_native($) {
3256 my ($arch) = @_;
3257 return get_host_arch() eq $arch;
3258 }
3259
3260 sub get_vm_arch {
3261 my ($conf) = @_;
3262 return $conf->{arch} // get_host_arch();
3263 }
3264
3265 my $default_machines = {
3266 x86_64 => 'pc',
3267 aarch64 => 'virt',
3268 };
3269
3270 sub get_installed_machine_version {
3271 my ($kvmversion) = @_;
3272 $kvmversion = kvm_user_version() if !defined($kvmversion);
3273 $kvmversion =~ m/^(\d+\.\d+)/;
3274 return $1;
3275 }
3276
3277 sub windows_get_pinned_machine_version {
3278 my ($machine, $base_version, $kvmversion) = @_;
3279
3280 my $pin_version = $base_version;
3281 if (!defined($base_version) ||
3282 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3283 ) {
3284 $pin_version = get_installed_machine_version($kvmversion);
3285 }
3286 if (!$machine || $machine eq 'pc') {
3287 $machine = "pc-i440fx-$pin_version";
3288 } elsif ($machine eq 'q35') {
3289 $machine = "pc-q35-$pin_version";
3290 } elsif ($machine eq 'virt') {
3291 $machine = "virt-$pin_version";
3292 } else {
3293 warn "unknown machine type '$machine', not touching that!\n";
3294 }
3295
3296 return $machine;
3297 }
3298
3299 sub get_vm_machine {
3300 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3301
3302 my $machine = $forcemachine || $conf->{machine};
3303
3304 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3305 $kvmversion //= kvm_user_version();
3306 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3307 # layout which confuses windows quite a bit and may result in various regressions..
3308 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3309 if (windows_version($conf->{ostype})) {
3310 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3311 }
3312 $arch //= 'x86_64';
3313 $machine ||= $default_machines->{$arch};
3314 if ($add_pve_version) {
3315 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3316 $machine .= "+pve$pvever";
3317 }
3318 }
3319
3320 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3321 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3322 $machine = $1 if $is_pxe;
3323
3324 # for version-pinned machines that do not include a pve-version (e.g.
3325 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3326 $machine .= '+pve0';
3327
3328 $machine .= '.pxe' if $is_pxe;
3329 }
3330
3331 return $machine;
3332 }
3333
3334 sub get_ovmf_files($$$) {
3335 my ($arch, $efidisk, $smm) = @_;
3336
3337 my $types = $OVMF->{$arch}
3338 or die "no OVMF images known for architecture '$arch'\n";
3339
3340 my $type = 'default';
3341 if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3342 $type = $smm ? "4m" : "4m-no-smm";
3343 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3344 }
3345
3346 return $types->{$type}->@*;
3347 }
3348
3349 my $Arch2Qemu = {
3350 aarch64 => '/usr/bin/qemu-system-aarch64',
3351 x86_64 => '/usr/bin/qemu-system-x86_64',
3352 };
3353 sub get_command_for_arch($) {
3354 my ($arch) = @_;
3355 return '/usr/bin/kvm' if is_native($arch);
3356
3357 my $cmd = $Arch2Qemu->{$arch}
3358 or die "don't know how to emulate architecture '$arch'\n";
3359 return $cmd;
3360 }
3361
3362 # To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3363 # to use in a QEMU command line (-cpu element), first array_intersect the result
3364 # of query_supported_ with query_understood_. This is necessary because:
3365 #
3366 # a) query_understood_ returns flags the host cannot use and
3367 # b) query_supported_ (rather the QMP call) doesn't actually return CPU
3368 # flags, but CPU settings - with most of them being flags. Those settings
3369 # (and some flags, curiously) cannot be specified as a "-cpu" argument.
3370 #
3371 # query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3372 # expensive. If you need the value returned from this, you can get it much
3373 # cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3374 # $accel being 'kvm' or 'tcg'.
3375 #
3376 # pvestatd calls this function on startup and whenever the QEMU/KVM version
3377 # changes, automatically populating pmxcfs.
3378 #
3379 # Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3380 # since kvm and tcg machines support different flags
3381 #
3382 sub query_supported_cpu_flags {
3383 my ($arch) = @_;
3384
3385 $arch //= get_host_arch();
3386 my $default_machine = $default_machines->{$arch};
3387
3388 my $flags = {};
3389
3390 # FIXME: Once this is merged, the code below should work for ARM as well:
3391 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3392 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3393 $arch eq "aarch64";
3394
3395 my $kvm_supported = defined(kvm_version());
3396 my $qemu_cmd = get_command_for_arch($arch);
3397 my $fakevmid = -1;
3398 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3399
3400 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3401 my $query_supported_run_qemu = sub {
3402 my ($kvm) = @_;
3403
3404 my $flags = {};
3405 my $cmd = [
3406 $qemu_cmd,
3407 '-machine', $default_machine,
3408 '-display', 'none',
3409 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3410 '-mon', 'chardev=qmp,mode=control',
3411 '-pidfile', $pidfile,
3412 '-S', '-daemonize'
3413 ];
3414
3415 if (!$kvm) {
3416 push @$cmd, '-accel', 'tcg';
3417 }
3418
3419 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3420 die "QEMU flag querying VM exited with code " . $rc if $rc;
3421
3422 eval {
3423 my $cmd_result = mon_cmd(
3424 $fakevmid,
3425 'query-cpu-model-expansion',
3426 type => 'full',
3427 model => { name => 'host' }
3428 );
3429
3430 my $props = $cmd_result->{model}->{props};
3431 foreach my $prop (keys %$props) {
3432 next if $props->{$prop} ne '1';
3433 # QEMU returns some flags multiple times, with '_', '.' or '-'
3434 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3435 # We only keep those with underscores, to match /proc/cpuinfo
3436 $prop =~ s/\.|-/_/g;
3437 $flags->{$prop} = 1;
3438 }
3439 };
3440 my $err = $@;
3441
3442 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3443 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3444
3445 die $err if $err;
3446
3447 return [ sort keys %$flags ];
3448 };
3449
3450 # We need to query QEMU twice, since KVM and TCG have different supported flags
3451 PVE::QemuConfig->lock_config($fakevmid, sub {
3452 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3453 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3454
3455 if ($kvm_supported) {
3456 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3457 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3458 }
3459 });
3460
3461 return $flags;
3462 }
3463
3464 # Understood CPU flags are written to a file at 'pve-qemu' compile time
3465 my $understood_cpu_flag_dir = "/usr/share/kvm";
3466 sub query_understood_cpu_flags {
3467 my $arch = get_host_arch();
3468 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3469
3470 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3471 if ! -e $filepath;
3472
3473 my $raw = file_get_contents($filepath);
3474 $raw =~ s/^\s+|\s+$//g;
3475 my @flags = split(/\s+/, $raw);
3476
3477 return \@flags;
3478 }
3479
3480 # Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
3481 # anymore. But smm=off seems to be required when using SeaBIOS and serial display.
3482 my sub should_disable_smm {
3483 my ($conf, $vga) = @_;
3484
3485 return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
3486 $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
3487 }
3488
3489 sub config_to_command {
3490 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3491 $pbs_backing) = @_;
3492
3493 my $cmd = [];
3494 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3495 my $devices = [];
3496 my $bridges = {};
3497 my $ostype = $conf->{ostype};
3498 my $winversion = windows_version($ostype);
3499 my $kvm = $conf->{kvm};
3500 my $nodename = nodename();
3501
3502 my $arch = get_vm_arch($conf);
3503 my $kvm_binary = get_command_for_arch($arch);
3504 my $kvmver = kvm_user_version($kvm_binary);
3505
3506 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3507 $kvmver //= "undefined";
3508 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3509 }
3510
3511 my $add_pve_version = min_version($kvmver, 4, 1);
3512
3513 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3514 my $machine_version = extract_version($machine_type, $kvmver);
3515 $kvm //= 1 if is_native($arch);
3516
3517 $machine_version =~ m/(\d+)\.(\d+)/;
3518 my ($machine_major, $machine_minor) = ($1, $2);
3519
3520 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3521 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3522 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3523 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3524 ." please upgrade node '$nodename'\n"
3525 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3526 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3527 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3528 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3529 ." node '$nodename'\n";
3530 }
3531
3532 # if a specific +pve version is required for a feature, use $version_guard
3533 # instead of min_version to allow machines to be run with the minimum
3534 # required version
3535 my $required_pve_version = 0;
3536 my $version_guard = sub {
3537 my ($major, $minor, $pve) = @_;
3538 return 0 if !min_version($machine_version, $major, $minor, $pve);
3539 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3540 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3541 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3542 return 1;
3543 };
3544
3545 if ($kvm && !defined kvm_version()) {
3546 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3547 ." or enable in BIOS.\n";
3548 }
3549
3550 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3551 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3552 my $use_old_bios_files = undef;
3553 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3554
3555 if ($conf->{affinity}) {
3556 push @$cmd, "/usr/bin/taskset";
3557 push @$cmd, "--cpu-list";
3558 push @$cmd, "--all-tasks";
3559 push @$cmd, $conf->{affinity};
3560 }
3561
3562 push @$cmd, $kvm_binary;
3563
3564 push @$cmd, '-id', $vmid;
3565
3566 my $vmname = $conf->{name} || "vm$vmid";
3567
3568 push @$cmd, '-name', "$vmname,debug-threads=on";
3569
3570 push @$cmd, '-no-shutdown';
3571
3572 my $use_virtio = 0;
3573
3574 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3575 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3576 push @$cmd, '-mon', "chardev=qmp,mode=control";
3577
3578 if (min_version($machine_version, 2, 12)) {
3579 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3580 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3581 }
3582
3583 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3584
3585 push @$cmd, '-daemonize';
3586
3587 if ($conf->{smbios1}) {
3588 my $smbios_conf = parse_smbios1($conf->{smbios1});
3589 if ($smbios_conf->{base64}) {
3590 # Do not pass base64 flag to qemu
3591 delete $smbios_conf->{base64};
3592 my $smbios_string = "";
3593 foreach my $key (keys %$smbios_conf) {
3594 my $value;
3595 if ($key eq "uuid") {
3596 $value = $smbios_conf->{uuid}
3597 } else {
3598 $value = decode_base64($smbios_conf->{$key});
3599 }
3600 # qemu accepts any binary data, only commas need escaping by double comma
3601 $value =~ s/,/,,/g;
3602 $smbios_string .= "," . $key . "=" . $value if $value;
3603 }
3604 push @$cmd, '-smbios', "type=1" . $smbios_string;
3605 } else {
3606 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3607 }
3608 }
3609
3610 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3611 my $d;
3612 if (my $efidisk = $conf->{efidisk0}) {
3613 $d = parse_drive('efidisk0', $efidisk);
3614 }
3615
3616 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3617 die "uefi base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3618
3619 my ($path, $format);
3620 my $read_only_str = '';
3621 if ($d) {
3622 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3623 $format = $d->{format};
3624 if ($storeid) {
3625 $path = PVE::Storage::path($storecfg, $d->{file});
3626 if (!defined($format)) {
3627 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3628 $format = qemu_img_format($scfg, $volname);
3629 }
3630 } else {
3631 $path = $d->{file};
3632 die "efidisk format must be specified\n"
3633 if !defined($format);
3634 }
3635
3636 $read_only_str = ',readonly=on' if drive_is_read_only($conf, $d);
3637 } else {
3638 log_warn("no efidisk configured! Using temporary efivars disk.");
3639 $path = "/tmp/$vmid-ovmf.fd";
3640 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3641 $format = 'raw';
3642 }
3643
3644 my $size_str = "";
3645
3646 if ($format eq 'raw' && $version_guard->(4, 1, 2)) {
3647 $size_str = ",size=" . (-s $ovmf_vars);
3648 }
3649
3650 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3651 my $cache = "";
3652 if ($path =~ m/^rbd:/) {
3653 $cache = ',cache=writeback';
3654 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3655 }
3656
3657 push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code";
3658 push @$cmd, '-drive', "if=pflash,unit=1$cache,format=$format,id=drive-efidisk0$size_str,file=${path}${read_only_str}";
3659 }
3660
3661 if ($q35) { # tell QEMU to load q35 config early
3662 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3663 if (min_version($machine_version, 4, 0)) {
3664 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3665 } else {
3666 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3667 }
3668 }
3669
3670 if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
3671 push @$cmd, $fixups->@*;
3672 }
3673
3674 if ($conf->{vmgenid}) {
3675 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3676 }
3677
3678 # add usb controllers
3679 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3680 $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES);
3681 push @$devices, @usbcontrollers if @usbcontrollers;
3682 my $vga = parse_vga($conf->{vga});
3683
3684 my $qxlnum = vga_conf_has_spice($conf->{vga});
3685 $vga->{type} = 'qxl' if $qxlnum;
3686
3687 if (!$vga->{type}) {
3688 if ($arch eq 'aarch64') {
3689 $vga->{type} = 'virtio';
3690 } elsif (min_version($machine_version, 2, 9)) {
3691 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3692 } else {
3693 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3694 }
3695 }
3696
3697 # enable absolute mouse coordinates (needed by vnc)
3698 my $tablet = $conf->{tablet};
3699 if (!defined($tablet)) {
3700 $tablet = $defaults->{tablet};
3701 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3702 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3703 }
3704
3705 if ($tablet) {
3706 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3707 my $kbd = print_keyboarddevice_full($conf, $arch);
3708 push @$devices, '-device', $kbd if defined($kbd);
3709 }
3710
3711 my $bootorder = device_bootorder($conf);
3712
3713 # host pci device passthrough
3714 my ($kvm_off, $gpu_passthrough, $legacy_igd) = PVE::QemuServer::PCI::print_hostpci_devices(
3715 $vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder);
3716
3717 # usb devices
3718 my $usb_dev_features = {};
3719 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3720
3721 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3722 $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder);
3723 push @$devices, @usbdevices if @usbdevices;
3724
3725 # serial devices
3726 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3727 my $path = $conf->{"serial$i"} or next;
3728 if ($path eq 'socket') {
3729 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3730 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3731 # On aarch64, serial0 is the UART device. Qemu only allows
3732 # connecting UART devices via the '-serial' command line, as
3733 # the device has a fixed slot on the hardware...
3734 if ($arch eq 'aarch64' && $i == 0) {
3735 push @$devices, '-serial', "chardev:serial$i";
3736 } else {
3737 push @$devices, '-device', "isa-serial,chardev=serial$i";
3738 }
3739 } else {
3740 die "no such serial device\n" if ! -c $path;
3741 push @$devices, '-chardev', "tty,id=serial$i,path=$path";
3742 push @$devices, '-device', "isa-serial,chardev=serial$i";
3743 }
3744 }
3745
3746 # parallel devices
3747 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3748 if (my $path = $conf->{"parallel$i"}) {
3749 die "no such parallel device\n" if ! -c $path;
3750 my $devtype = $path =~ m!^/dev/usb/lp! ? 'tty' : 'parport';
3751 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3752 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3753 }
3754 }
3755
3756 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3757 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3758 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3759 push @$devices, @$audio_devs;
3760 }
3761
3762 add_tpm_device($vmid, $devices, $conf);
3763
3764 my $sockets = 1;
3765 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3766 $sockets = $conf->{sockets} if $conf->{sockets};
3767
3768 my $cores = $conf->{cores} || 1;
3769
3770 my $maxcpus = $sockets * $cores;
3771
3772 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3773
3774 my $allowed_vcpus = $cpuinfo->{cpus};
3775
3776 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3777
3778 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3779 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3780 for (my $i = 2; $i <= $vcpus; $i++) {
3781 my $cpustr = print_cpu_device($conf,$i);
3782 push @$cmd, '-device', $cpustr;
3783 }
3784
3785 } else {
3786
3787 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3788 }
3789 push @$cmd, '-nodefaults';
3790
3791 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3792
3793 push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3794
3795 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3796
3797 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3798 push @$devices, '-device', print_vga_device(
3799 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3800
3801 push @$cmd, '-display', 'egl-headless,gl=core' if $vga->{type} eq 'virtio-gl'; # VIRGL
3802
3803 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3804 push @$cmd, '-vnc', "unix:$socket,password=on";
3805 } else {
3806 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3807 push @$cmd, '-nographic';
3808 }
3809
3810 # time drift fix
3811 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3812 my $useLocaltime = $conf->{localtime};
3813
3814 if ($winversion >= 5) { # windows
3815 $useLocaltime = 1 if !defined($conf->{localtime});
3816
3817 # use time drift fix when acpi is enabled
3818 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3819 $tdf = 1 if !defined($conf->{tdf});
3820 }
3821 }
3822
3823 if ($winversion >= 6) {
3824 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3825 push @$cmd, '-no-hpet';
3826 }
3827
3828 push @$rtcFlags, 'driftfix=slew' if $tdf;
3829
3830 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3831 push @$rtcFlags, "base=$conf->{startdate}";
3832 } elsif ($useLocaltime) {
3833 push @$rtcFlags, 'base=localtime';
3834 }
3835
3836 if ($forcecpu) {
3837 push @$cmd, '-cpu', $forcecpu;
3838 } else {
3839 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3840 }
3841
3842 PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
3843
3844 push @$cmd, '-S' if $conf->{freeze};
3845
3846 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3847
3848 my $guest_agent = parse_guest_agent($conf);
3849
3850 if ($guest_agent->{enabled}) {
3851 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3852 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3853
3854 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3855 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3856 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3857 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3858 } elsif ($guest_agent->{type} eq 'isa') {
3859 push @$devices, '-device', "isa-serial,chardev=qga0";
3860 }
3861 }
3862
3863 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3864 if ($rng && $version_guard->(4, 1, 2)) {
3865 check_rng_source($rng->{source});
3866
3867 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3868 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3869 my $limiter_str = "";
3870 if ($max_bytes) {
3871 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3872 }
3873
3874 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3875 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3876 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3877 }
3878
3879 my $spice_port;
3880
3881 if ($qxlnum || $vga->{type} =~ /^virtio/) {
3882 if ($qxlnum > 1) {
3883 if ($winversion){
3884 for (my $i = 1; $i < $qxlnum; $i++){
3885 push @$devices, '-device', print_vga_device(
3886 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3887 }
3888 } else {
3889 # assume other OS works like Linux
3890 my ($ram, $vram) = ("134217728", "67108864");
3891 if ($vga->{memory}) {
3892 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3893 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3894 }
3895 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3896 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3897 }
3898 }
3899
3900 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3901
3902 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3903 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3904 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3905
3906 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3907 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3908 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3909
3910 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3911 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3912
3913 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3914 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3915 if ($spice_enhancement->{foldersharing}) {
3916 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3917 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3918 }
3919
3920 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3921 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3922 if $spice_enhancement->{videostreaming};
3923
3924 push @$devices, '-spice', "$spice_opts";
3925 }
3926
3927 # enable balloon by default, unless explicitly disabled
3928 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3929 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3930 my $ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr";
3931 $ballooncmd .= ",free-page-reporting=on" if min_version($machine_version, 6, 2);
3932 push @$devices, '-device', $ballooncmd;
3933 }
3934
3935 if ($conf->{watchdog}) {
3936 my $wdopts = parse_watchdog($conf->{watchdog});
3937 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3938 my $watchdog = $wdopts->{model} || 'i6300esb';
3939 push @$devices, '-device', "$watchdog$pciaddr";
3940 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3941 }
3942
3943 my $vollist = [];
3944 my $scsicontroller = {};
3945 my $ahcicontroller = {};
3946 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3947
3948 # Add iscsi initiator name if available
3949 if (my $initiator = get_initiator_name()) {
3950 push @$devices, '-iscsi', "initiator-name=$initiator";
3951 }
3952
3953 PVE::QemuConfig->foreach_volume($conf, sub {
3954 my ($ds, $drive) = @_;
3955
3956 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3957 check_volume_storage_type($storecfg, $drive->{file});
3958 push @$vollist, $drive->{file};
3959 }
3960
3961 # ignore efidisk here, already added in bios/fw handling code above
3962 return if $drive->{interface} eq 'efidisk';
3963 # similar for TPM
3964 return if $drive->{interface} eq 'tpmstate';
3965
3966 $use_virtio = 1 if $ds =~ m/^virtio/;
3967
3968 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3969
3970 if ($drive->{interface} eq 'virtio'){
3971 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
3972 }
3973
3974 if ($drive->{interface} eq 'scsi') {
3975
3976 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
3977
3978 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
3979 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
3980
3981 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
3982 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
3983
3984 my $iothread = '';
3985 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
3986 $iothread .= ",iothread=iothread-$controller_prefix$controller";
3987 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
3988 } elsif ($drive->{iothread}) {
3989 log_warn(
3990 "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n"
3991 );
3992 }
3993
3994 my $queues = '';
3995 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
3996 $queues = ",num_queues=$drive->{queues}";
3997 }
3998
3999 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
4000 if !$scsicontroller->{$controller};
4001 $scsicontroller->{$controller}=1;
4002 }
4003
4004 if ($drive->{interface} eq 'sata') {
4005 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
4006 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
4007 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
4008 if !$ahcicontroller->{$controller};
4009 $ahcicontroller->{$controller}=1;
4010 }
4011
4012 my $pbs_conf = $pbs_backing->{$ds};
4013 my $pbs_name = undef;
4014 if ($pbs_conf) {
4015 $pbs_name = "drive-$ds-pbs";
4016 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
4017 }
4018
4019 my $drive_cmd = print_drive_commandline_full(
4020 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
4021
4022 # extra protection for templates, but SATA and IDE don't support it..
4023 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
4024
4025 push @$devices, '-drive',$drive_cmd;
4026 push @$devices, '-device', print_drivedevice_full(
4027 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
4028 });
4029
4030 for (my $i = 0; $i < $MAX_NETS; $i++) {
4031 my $netname = "net$i";
4032
4033 next if !$conf->{$netname};
4034 my $d = parse_net($conf->{$netname});
4035 next if !$d;
4036
4037 $use_virtio = 1 if $d->{model} eq 'virtio';
4038
4039 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
4040
4041 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
4042 push @$devices, '-netdev', $netdevfull;
4043
4044 my $netdevicefull = print_netdevice_full(
4045 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type);
4046
4047 push @$devices, '-device', $netdevicefull;
4048 }
4049
4050 if ($conf->{ivshmem}) {
4051 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
4052
4053 my $bus;
4054 if ($q35) {
4055 $bus = print_pcie_addr("ivshmem");
4056 } else {
4057 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
4058 }
4059
4060 my $ivshmem_name = $ivshmem->{name} // $vmid;
4061 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
4062
4063 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
4064 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
4065 .",size=$ivshmem->{size}M";
4066 }
4067
4068 # pci.4 is nested in pci.1
4069 $bridges->{1} = 1 if $bridges->{4};
4070
4071 if (!$q35) { # add pci bridges
4072 if (min_version($machine_version, 2, 3)) {
4073 $bridges->{1} = 1;
4074 $bridges->{2} = 1;
4075 }
4076 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
4077 }
4078
4079 for my $k (sort {$b cmp $a} keys %$bridges) {
4080 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
4081
4082 my $k_name = $k;
4083 if ($k == 2 && $legacy_igd) {
4084 $k_name = "$k-igd";
4085 }
4086 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
4087 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
4088
4089 if ($q35) { # add after -readconfig pve-q35.cfg
4090 splice @$devices, 2, 0, '-device', $devstr;
4091 } else {
4092 unshift @$devices, '-device', $devstr if $k > 0;
4093 }
4094 }
4095
4096 if (!$kvm) {
4097 push @$machineFlags, 'accel=tcg';
4098 }
4099
4100 push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga);
4101
4102 my $machine_type_min = $machine_type;
4103 if ($add_pve_version) {
4104 $machine_type_min =~ s/\+pve\d+$//;
4105 $machine_type_min .= "+pve$required_pve_version";
4106 }
4107 push @$machineFlags, "type=${machine_type_min}";
4108
4109 push @$cmd, @$devices;
4110 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
4111 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
4112 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
4113
4114 if (my $vmstate = $conf->{vmstate}) {
4115 my $statepath = PVE::Storage::path($storecfg, $vmstate);
4116 push @$vollist, $vmstate;
4117 push @$cmd, '-loadstate', $statepath;
4118 print "activating and using '$vmstate' as vmstate\n";
4119 }
4120
4121 if (PVE::QemuConfig->is_template($conf)) {
4122 # needed to workaround base volumes being read-only
4123 push @$cmd, '-snapshot';
4124 }
4125
4126 # add custom args
4127 if ($conf->{args}) {
4128 my $aa = PVE::Tools::split_args($conf->{args});
4129 push @$cmd, @$aa;
4130 }
4131
4132 return wantarray ? ($cmd, $vollist, $spice_port) : $cmd;
4133 }
4134
4135 sub check_rng_source {
4136 my ($source) = @_;
4137
4138 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4139 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4140 if ! -e $source;
4141
4142 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4143 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4144 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4145 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4146 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4147 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4148 ." to the host.\n";
4149 }
4150 }
4151
4152 sub spice_port {
4153 my ($vmid) = @_;
4154
4155 my $res = mon_cmd($vmid, 'query-spice');
4156
4157 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
4158 }
4159
4160 sub vm_devices_list {
4161 my ($vmid) = @_;
4162
4163 my $res = mon_cmd($vmid, 'query-pci');
4164 my $devices_to_check = [];
4165 my $devices = {};
4166 foreach my $pcibus (@$res) {
4167 push @$devices_to_check, @{$pcibus->{devices}},
4168 }
4169
4170 while (@$devices_to_check) {
4171 my $to_check = [];
4172 for my $d (@$devices_to_check) {
4173 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
4174 next if !$d->{'pci_bridge'};
4175
4176 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4177 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
4178 }
4179 $devices_to_check = $to_check;
4180 }
4181
4182 my $resblock = mon_cmd($vmid, 'query-block');
4183 foreach my $block (@$resblock) {
4184 if($block->{device} =~ m/^drive-(\S+)/){
4185 $devices->{$1} = 1;
4186 }
4187 }
4188
4189 my $resmice = mon_cmd($vmid, 'query-mice');
4190 foreach my $mice (@$resmice) {
4191 if ($mice->{name} eq 'QEMU HID Tablet') {
4192 $devices->{tablet} = 1;
4193 last;
4194 }
4195 }
4196
4197 # for usb devices there is no query-usb
4198 # but we can iterate over the entries in
4199 # qom-list path=/machine/peripheral
4200 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4201 foreach my $per (@$resperipheral) {
4202 if ($per->{name} =~ m/^usb\d+$/) {
4203 $devices->{$per->{name}} = 1;
4204 }
4205 }
4206
4207 return $devices;
4208 }
4209
4210 sub vm_deviceplug {
4211 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4212
4213 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4214
4215 my $devices_list = vm_devices_list($vmid);
4216 return 1 if defined($devices_list->{$deviceid});
4217
4218 # add PCI bridge if we need it for the device
4219 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4220
4221 if ($deviceid eq 'tablet') {
4222 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4223 } elsif ($deviceid eq 'keyboard') {
4224 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4225 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4226 die "usb hotplug currently not reliable\n";
4227 # since we can't reliably hot unplug all added usb devices and usb
4228 # passthrough breaks live migration we disable usb hotplugging for now
4229 #qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device));
4230 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4231 qemu_iothread_add($vmid, $deviceid, $device);
4232
4233 qemu_driveadd($storecfg, $vmid, $device);
4234 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4235
4236 qemu_deviceadd($vmid, $devicefull);
4237 eval { qemu_deviceaddverify($vmid, $deviceid); };
4238 if (my $err = $@) {
4239 eval { qemu_drivedel($vmid, $deviceid); };
4240 warn $@ if $@;
4241 die $err;
4242 }
4243 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4244 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4245 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4246 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4247
4248 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4249
4250 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4251 qemu_iothread_add($vmid, $deviceid, $device);
4252 $devicefull .= ",iothread=iothread-$deviceid";
4253 }
4254
4255 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4256 $devicefull .= ",num_queues=$device->{queues}";
4257 }
4258
4259 qemu_deviceadd($vmid, $devicefull);
4260 qemu_deviceaddverify($vmid, $deviceid);
4261 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4262 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4263 qemu_driveadd($storecfg, $vmid, $device);
4264
4265 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4266 eval { qemu_deviceadd($vmid, $devicefull); };
4267 if (my $err = $@) {
4268 eval { qemu_drivedel($vmid, $deviceid); };
4269 warn $@ if $@;
4270 die $err;
4271 }
4272 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4273 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4274
4275 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4276 my $use_old_bios_files = undef;
4277 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4278
4279 my $netdevicefull = print_netdevice_full(
4280 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type);
4281 qemu_deviceadd($vmid, $netdevicefull);
4282 eval {
4283 qemu_deviceaddverify($vmid, $deviceid);
4284 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4285 };
4286 if (my $err = $@) {
4287 eval { qemu_netdevdel($vmid, $deviceid); };
4288 warn $@ if $@;
4289 die $err;
4290 }
4291 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4292 my $bridgeid = $2;
4293 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4294 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4295
4296 qemu_deviceadd($vmid, $devicefull);
4297 qemu_deviceaddverify($vmid, $deviceid);
4298 } else {
4299 die "can't hotplug device '$deviceid'\n";
4300 }
4301
4302 return 1;
4303 }
4304
4305 # fixme: this should raise exceptions on error!
4306 sub vm_deviceunplug {
4307 my ($vmid, $conf, $deviceid) = @_;
4308
4309 my $devices_list = vm_devices_list($vmid);
4310 return 1 if !defined($devices_list->{$deviceid});
4311
4312 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4313 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4314
4315 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard') {
4316 qemu_devicedel($vmid, $deviceid);
4317 } elsif ($deviceid =~ m/^usb\d+$/) {
4318 die "usb hotplug currently not reliable\n";
4319 # when unplugging usb devices this way, there may be remaining usb
4320 # controllers/hubs so we disable it for now
4321 #qemu_devicedel($vmid, $deviceid);
4322 #qemu_devicedelverify($vmid, $deviceid);
4323 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4324 my $device = parse_drive($deviceid, $conf->{$deviceid});
4325
4326 qemu_devicedel($vmid, $deviceid);
4327 qemu_devicedelverify($vmid, $deviceid);
4328 qemu_drivedel($vmid, $deviceid);
4329 qemu_iothread_del($vmid, $deviceid, $device);
4330 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4331 qemu_devicedel($vmid, $deviceid);
4332 qemu_devicedelverify($vmid, $deviceid);
4333 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4334 my $device = parse_drive($deviceid, $conf->{$deviceid});
4335
4336 qemu_devicedel($vmid, $deviceid);
4337 qemu_devicedelverify($vmid, $deviceid);
4338 qemu_drivedel($vmid, $deviceid);
4339 qemu_deletescsihw($conf, $vmid, $deviceid);
4340
4341 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4342 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4343 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4344 qemu_devicedel($vmid, $deviceid);
4345 qemu_devicedelverify($vmid, $deviceid);
4346 qemu_netdevdel($vmid, $deviceid);
4347 } else {
4348 die "can't unplug device '$deviceid'\n";
4349 }
4350
4351 return 1;
4352 }
4353
4354 sub qemu_deviceadd {
4355 my ($vmid, $devicefull) = @_;
4356
4357 $devicefull = "driver=".$devicefull;
4358 my %options = split(/[=,]/, $devicefull);
4359
4360 mon_cmd($vmid, "device_add" , %options);
4361 }
4362
4363 sub qemu_devicedel {
4364 my ($vmid, $deviceid) = @_;
4365
4366 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
4367 }
4368
4369 sub qemu_iothread_add {
4370 my ($vmid, $deviceid, $device) = @_;
4371
4372 if ($device->{iothread}) {
4373 my $iothreads = vm_iothreads_list($vmid);
4374 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4375 }
4376 }
4377
4378 sub qemu_iothread_del {
4379 my ($vmid, $deviceid, $device) = @_;
4380
4381 if ($device->{iothread}) {
4382 my $iothreads = vm_iothreads_list($vmid);
4383 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4384 }
4385 }
4386
4387 sub qemu_objectadd {
4388 my ($vmid, $objectid, $qomtype) = @_;
4389
4390 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4391
4392 return 1;
4393 }
4394
4395 sub qemu_objectdel {
4396 my ($vmid, $objectid) = @_;
4397
4398 mon_cmd($vmid, "object-del", id => $objectid);
4399
4400 return 1;
4401 }
4402
4403 sub qemu_driveadd {
4404 my ($storecfg, $vmid, $device) = @_;
4405
4406 my $kvmver = get_running_qemu_version($vmid);
4407 my $io_uring = min_version($kvmver, 6, 0);
4408 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4409 $drive =~ s/\\/\\\\/g;
4410 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4411
4412 # If the command succeeds qemu prints: "OK"
4413 return 1 if $ret =~ m/OK/s;
4414
4415 die "adding drive failed: $ret\n";
4416 }
4417
4418 sub qemu_drivedel {
4419 my ($vmid, $deviceid) = @_;
4420
4421 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4422 $ret =~ s/^\s+//;
4423
4424 return 1 if $ret eq "";
4425
4426 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4427 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4428
4429 die "deleting drive $deviceid failed : $ret\n";
4430 }
4431
4432 sub qemu_deviceaddverify {
4433 my ($vmid, $deviceid) = @_;
4434
4435 for (my $i = 0; $i <= 5; $i++) {
4436 my $devices_list = vm_devices_list($vmid);
4437 return 1 if defined($devices_list->{$deviceid});
4438 sleep 1;
4439 }
4440
4441 die "error on hotplug device '$deviceid'\n";
4442 }
4443
4444
4445 sub qemu_devicedelverify {
4446 my ($vmid, $deviceid) = @_;
4447
4448 # need to verify that the device is correctly removed as device_del
4449 # is async and empty return is not reliable
4450
4451 for (my $i = 0; $i <= 5; $i++) {
4452 my $devices_list = vm_devices_list($vmid);
4453 return 1 if !defined($devices_list->{$deviceid});
4454 sleep 1;
4455 }
4456
4457 die "error on hot-unplugging device '$deviceid'\n";
4458 }
4459
4460 sub qemu_findorcreatescsihw {
4461 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4462
4463 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4464
4465 my $scsihwid="$controller_prefix$controller";
4466 my $devices_list = vm_devices_list($vmid);
4467
4468 if (!defined($devices_list->{$scsihwid})) {
4469 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4470 }
4471
4472 return 1;
4473 }
4474
4475 sub qemu_deletescsihw {
4476 my ($conf, $vmid, $opt) = @_;
4477
4478 my $device = parse_drive($opt, $conf->{$opt});
4479
4480 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4481 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4482 return 1;
4483 }
4484
4485 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4486
4487 my $devices_list = vm_devices_list($vmid);
4488 foreach my $opt (keys %{$devices_list}) {
4489 if (is_valid_drivename($opt)) {
4490 my $drive = parse_drive($opt, $conf->{$opt});
4491 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4492 return 1;
4493 }
4494 }
4495 }
4496
4497 my $scsihwid="scsihw$controller";
4498
4499 vm_deviceunplug($vmid, $conf, $scsihwid);
4500
4501 return 1;
4502 }
4503
4504 sub qemu_add_pci_bridge {
4505 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4506
4507 my $bridges = {};
4508
4509 my $bridgeid;
4510
4511 print_pci_addr($device, $bridges, $arch, $machine_type);
4512
4513 while (my ($k, $v) = each %$bridges) {
4514 $bridgeid = $k;
4515 }
4516 return 1 if !defined($bridgeid) || $bridgeid < 1;
4517
4518 my $bridge = "pci.$bridgeid";
4519 my $devices_list = vm_devices_list($vmid);
4520
4521 if (!defined($devices_list->{$bridge})) {
4522 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4523 }
4524
4525 return 1;
4526 }
4527
4528 sub qemu_set_link_status {
4529 my ($vmid, $device, $up) = @_;
4530
4531 mon_cmd($vmid, "set_link", name => $device,
4532 up => $up ? JSON::true : JSON::false);
4533 }
4534
4535 sub qemu_netdevadd {
4536 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4537
4538 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4539 my %options = split(/[=,]/, $netdev);
4540
4541 if (defined(my $vhost = $options{vhost})) {
4542 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4543 }
4544
4545 if (defined(my $queues = $options{queues})) {
4546 $options{queues} = $queues + 0;
4547 }
4548
4549 mon_cmd($vmid, "netdev_add", %options);
4550 return 1;
4551 }
4552
4553 sub qemu_netdevdel {
4554 my ($vmid, $deviceid) = @_;
4555
4556 mon_cmd($vmid, "netdev_del", id => $deviceid);
4557 }
4558
4559 sub qemu_usb_hotplug {
4560 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4561
4562 return if !$device;
4563
4564 # remove the old one first
4565 vm_deviceunplug($vmid, $conf, $deviceid);
4566
4567 # check if xhci controller is necessary and available
4568 if ($device->{usb3}) {
4569
4570 my $devicelist = vm_devices_list($vmid);
4571
4572 if (!$devicelist->{xhci}) {
4573 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4574 qemu_deviceadd($vmid, "nec-usb-xhci,id=xhci$pciaddr");
4575 }
4576 }
4577 my $d = parse_usb_device($device->{host});
4578 $d->{usb3} = $device->{usb3};
4579
4580 # add the new one
4581 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d, $arch, $machine_type);
4582 }
4583
4584 sub qemu_cpu_hotplug {
4585 my ($vmid, $conf, $vcpus) = @_;
4586
4587 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4588
4589 my $sockets = 1;
4590 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4591 $sockets = $conf->{sockets} if $conf->{sockets};
4592 my $cores = $conf->{cores} || 1;
4593 my $maxcpus = $sockets * $cores;
4594
4595 $vcpus = $maxcpus if !$vcpus;
4596
4597 die "you can't add more vcpus than maxcpus\n"
4598 if $vcpus > $maxcpus;
4599
4600 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4601
4602 if ($vcpus < $currentvcpus) {
4603
4604 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4605
4606 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4607 qemu_devicedel($vmid, "cpu$i");
4608 my $retry = 0;
4609 my $currentrunningvcpus = undef;
4610 while (1) {
4611 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4612 last if scalar(@{$currentrunningvcpus}) == $i-1;
4613 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4614 $retry++;
4615 sleep 1;
4616 }
4617 #update conf after each succesfull cpu unplug
4618 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4619 PVE::QemuConfig->write_config($vmid, $conf);
4620 }
4621 } else {
4622 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4623 }
4624
4625 return;
4626 }
4627
4628 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4629 die "vcpus in running vm does not match its configuration\n"
4630 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4631
4632 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4633
4634 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4635 my $cpustr = print_cpu_device($conf, $i);
4636 qemu_deviceadd($vmid, $cpustr);
4637
4638 my $retry = 0;
4639 my $currentrunningvcpus = undef;
4640 while (1) {
4641 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4642 last if scalar(@{$currentrunningvcpus}) == $i;
4643 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4644 sleep 1;
4645 $retry++;
4646 }
4647 #update conf after each succesfull cpu hotplug
4648 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4649 PVE::QemuConfig->write_config($vmid, $conf);
4650 }
4651 } else {
4652
4653 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4654 mon_cmd($vmid, "cpu-add", id => int($i));
4655 }
4656 }
4657 }
4658
4659 sub qemu_block_set_io_throttle {
4660 my ($vmid, $deviceid,
4661 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4662 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4663 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4664 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4665
4666 return if !check_running($vmid) ;
4667
4668 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4669 bps => int($bps),
4670 bps_rd => int($bps_rd),
4671 bps_wr => int($bps_wr),
4672 iops => int($iops),
4673 iops_rd => int($iops_rd),
4674 iops_wr => int($iops_wr),
4675 bps_max => int($bps_max),
4676 bps_rd_max => int($bps_rd_max),
4677 bps_wr_max => int($bps_wr_max),
4678 iops_max => int($iops_max),
4679 iops_rd_max => int($iops_rd_max),
4680 iops_wr_max => int($iops_wr_max),
4681 bps_max_length => int($bps_max_length),
4682 bps_rd_max_length => int($bps_rd_max_length),
4683 bps_wr_max_length => int($bps_wr_max_length),
4684 iops_max_length => int($iops_max_length),
4685 iops_rd_max_length => int($iops_rd_max_length),
4686 iops_wr_max_length => int($iops_wr_max_length),
4687 );
4688
4689 }
4690
4691 sub qemu_block_resize {
4692 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4693
4694 my $running = check_running($vmid);
4695
4696 $size = 0 if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4697
4698 return if !$running;
4699
4700 my $padding = (1024 - $size % 1024) % 1024;
4701 $size = $size + $padding;
4702
4703 mon_cmd(
4704 $vmid,
4705 "block_resize",
4706 device => $deviceid,
4707 size => int($size),
4708 timeout => 60,
4709 );
4710 }
4711
4712 sub qemu_volume_snapshot {
4713 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4714
4715 my $running = check_running($vmid);
4716
4717 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4718 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4719 } else {
4720 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4721 }
4722 }
4723
4724 sub qemu_volume_snapshot_delete {
4725 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4726
4727 my $running = check_running($vmid);
4728
4729 if($running) {
4730
4731 $running = undef;
4732 my $conf = PVE::QemuConfig->load_config($vmid);
4733 PVE::QemuConfig->foreach_volume($conf, sub {
4734 my ($ds, $drive) = @_;
4735 $running = 1 if $drive->{file} eq $volid;
4736 });
4737 }
4738
4739 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4740 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
4741 } else {
4742 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4743 }
4744 }
4745
4746 sub set_migration_caps {
4747 my ($vmid, $savevm) = @_;
4748
4749 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4750
4751 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4752 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4753
4754 my $cap_ref = [];
4755
4756 my $enabled_cap = {
4757 "auto-converge" => 1,
4758 "xbzrle" => 1,
4759 "x-rdma-pin-all" => 0,
4760 "zero-blocks" => 0,
4761 "compress" => 0,
4762 "dirty-bitmaps" => $dirty_bitmaps,
4763 };
4764
4765 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4766
4767 for my $supported_capability (@$supported_capabilities) {
4768 push @$cap_ref, {
4769 capability => $supported_capability->{capability},
4770 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4771 };
4772 }
4773
4774 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4775 }
4776
4777 sub foreach_volid {
4778 my ($conf, $func, @param) = @_;
4779
4780 my $volhash = {};
4781
4782 my $test_volid = sub {
4783 my ($key, $drive, $snapname) = @_;
4784
4785 my $volid = $drive->{file};
4786 return if !$volid;
4787
4788 $volhash->{$volid}->{cdrom} //= 1;
4789 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4790
4791 my $replicate = $drive->{replicate} // 1;
4792 $volhash->{$volid}->{replicate} //= 0;
4793 $volhash->{$volid}->{replicate} = 1 if $replicate;
4794
4795 $volhash->{$volid}->{shared} //= 0;
4796 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4797
4798 $volhash->{$volid}->{referenced_in_config} //= 0;
4799 $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname);
4800
4801 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4802 if defined($snapname);
4803
4804 my $size = $drive->{size};
4805 $volhash->{$volid}->{size} //= $size if $size;
4806
4807 $volhash->{$volid}->{is_vmstate} //= 0;
4808 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4809
4810 $volhash->{$volid}->{is_tpmstate} //= 0;
4811 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4812
4813 $volhash->{$volid}->{is_unused} //= 0;
4814 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4815
4816 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4817 };
4818
4819 my $include_opts = {
4820 extra_keys => ['vmstate'],
4821 include_unused => 1,
4822 };
4823
4824 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4825 foreach my $snapname (keys %{$conf->{snapshots}}) {
4826 my $snap = $conf->{snapshots}->{$snapname};
4827 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4828 }
4829
4830 foreach my $volid (keys %$volhash) {
4831 &$func($volid, $volhash->{$volid}, @param);
4832 }
4833 }
4834
4835 my $fast_plug_option = {
4836 'lock' => 1,
4837 'name' => 1,
4838 'onboot' => 1,
4839 'shares' => 1,
4840 'startup' => 1,
4841 'description' => 1,
4842 'protection' => 1,
4843 'vmstatestorage' => 1,
4844 'hookscript' => 1,
4845 'tags' => 1,
4846 };
4847
4848 for my $opt (keys %$confdesc_cloudinit) {
4849 $fast_plug_option->{$opt} = 1;
4850 };
4851
4852 # hotplug changes in [PENDING]
4853 # $selection hash can be used to only apply specified options, for
4854 # example: { cores => 1 } (only apply changed 'cores')
4855 # $errors ref is used to return error messages
4856 sub vmconfig_hotplug_pending {
4857 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4858
4859 my $defaults = load_defaults();
4860 my $arch = get_vm_arch($conf);
4861 my $machine_type = get_vm_machine($conf, undef, $arch);
4862
4863 # commit values which do not have any impact on running VM first
4864 # Note: those option cannot raise errors, we we do not care about
4865 # $selection and always apply them.
4866
4867 my $add_error = sub {
4868 my ($opt, $msg) = @_;
4869 $errors->{$opt} = "hotplug problem - $msg";
4870 };
4871
4872 my $changes = 0;
4873 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4874 if ($fast_plug_option->{$opt}) {
4875 $conf->{$opt} = $conf->{pending}->{$opt};
4876 delete $conf->{pending}->{$opt};
4877 $changes = 1;
4878 }
4879 }
4880
4881 if ($changes) {
4882 PVE::QemuConfig->write_config($vmid, $conf);
4883 }
4884
4885 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
4886
4887 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
4888 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4889 foreach my $opt (sort keys %$pending_delete_hash) {
4890 next if $selection && !$selection->{$opt};
4891 my $force = $pending_delete_hash->{$opt}->{force};
4892 eval {
4893 if ($opt eq 'hotplug') {
4894 die "skip\n" if ($conf->{hotplug} =~ /memory/);
4895 } elsif ($opt eq 'tablet') {
4896 die "skip\n" if !$hotplug_features->{usb};
4897 if ($defaults->{tablet}) {
4898 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4899 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4900 if $arch eq 'aarch64';
4901 } else {
4902 vm_deviceunplug($vmid, $conf, 'tablet');
4903 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4904 }
4905 } elsif ($opt =~ m/^usb\d+/) {
4906 die "skip\n";
4907 # since we cannot reliably hot unplug usb devices we are disabling it
4908 #die "skip\n" if !$hotplug_features->{usb} || $conf->{$opt} =~ m/spice/i;
4909 #vm_deviceunplug($vmid, $conf, $opt);
4910 } elsif ($opt eq 'vcpus') {
4911 die "skip\n" if !$hotplug_features->{cpu};
4912 qemu_cpu_hotplug($vmid, $conf, undef);
4913 } elsif ($opt eq 'balloon') {
4914 # enable balloon device is not hotpluggable
4915 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
4916 # here we reset the ballooning value to memory
4917 my $balloon = $conf->{memory} || $defaults->{memory};
4918 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4919 } elsif ($fast_plug_option->{$opt}) {
4920 # do nothing
4921 } elsif ($opt =~ m/^net(\d+)$/) {
4922 die "skip\n" if !$hotplug_features->{network};
4923 vm_deviceunplug($vmid, $conf, $opt);
4924 } elsif (is_valid_drivename($opt)) {
4925 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
4926 vm_deviceunplug($vmid, $conf, $opt);
4927 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4928 } elsif ($opt =~ m/^memory$/) {
4929 die "skip\n" if !$hotplug_features->{memory};
4930 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
4931 } elsif ($opt eq 'cpuunits') {
4932 $cgroup->change_cpu_shares(undef);
4933 } elsif ($opt eq 'cpulimit') {
4934 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
4935 } else {
4936 die "skip\n";
4937 }
4938 };
4939 if (my $err = $@) {
4940 &$add_error($opt, $err) if $err ne "skip\n";
4941 } else {
4942 delete $conf->{$opt};
4943 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4944 }
4945 }
4946
4947 foreach my $opt (keys %{$conf->{pending}}) {
4948 next if $selection && !$selection->{$opt};
4949 my $value = $conf->{pending}->{$opt};
4950 eval {
4951 if ($opt eq 'hotplug') {
4952 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
4953 } elsif ($opt eq 'tablet') {
4954 die "skip\n" if !$hotplug_features->{usb};
4955 if ($value == 1) {
4956 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4957 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4958 if $arch eq 'aarch64';
4959 } elsif ($value == 0) {
4960 vm_deviceunplug($vmid, $conf, 'tablet');
4961 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4962 }
4963 } elsif ($opt =~ m/^usb\d+$/) {
4964 die "skip\n";
4965 # since we cannot reliably hot unplug usb devices we disable it for now
4966 #die "skip\n" if !$hotplug_features->{usb} || $value =~ m/spice/i;
4967 #my $d = eval { parse_property_string($usbdesc->{format}, $value) };
4968 #die "skip\n" if !$d;
4969 #qemu_usb_hotplug($storecfg, $conf, $vmid, $opt, $d, $arch, $machine_type);
4970 } elsif ($opt eq 'vcpus') {
4971 die "skip\n" if !$hotplug_features->{cpu};
4972 qemu_cpu_hotplug($vmid, $conf, $value);
4973 } elsif ($opt eq 'balloon') {
4974 # enable/disable balloning device is not hotpluggable
4975 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
4976 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
4977 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
4978
4979 # allow manual ballooning if shares is set to zero
4980 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
4981 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
4982 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4983 }
4984 } elsif ($opt =~ m/^net(\d+)$/) {
4985 # some changes can be done without hotplug
4986 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
4987 $vmid, $opt, $value, $arch, $machine_type);
4988 } elsif (is_valid_drivename($opt)) {
4989 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
4990 # some changes can be done without hotplug
4991 my $drive = parse_drive($opt, $value);
4992 if (drive_is_cloudinit($drive)) {
4993 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid);
4994 }
4995 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
4996 $vmid, $opt, $value, $arch, $machine_type);
4997 } elsif ($opt =~ m/^memory$/) { #dimms
4998 die "skip\n" if !$hotplug_features->{memory};
4999 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
5000 } elsif ($opt eq 'cpuunits') {
5001 my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
5002 $cgroup->change_cpu_shares($new_cpuunits);
5003 } elsif ($opt eq 'cpulimit') {
5004 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
5005 $cgroup->change_cpu_quota($cpulimit, 100000);
5006 } elsif ($opt eq 'agent') {
5007 vmconfig_update_agent($conf, $opt, $value);
5008 } else {
5009 die "skip\n"; # skip non-hot-pluggable options
5010 }
5011 };
5012 if (my $err = $@) {
5013 &$add_error($opt, $err) if $err ne "skip\n";
5014 } else {
5015 $conf->{$opt} = $value;
5016 delete $conf->{pending}->{$opt};
5017 }
5018 }
5019 PVE::QemuConfig->write_config($vmid, $conf);
5020
5021 if($hotplug_features->{cloudinit}) {
5022 my $pending = PVE::QemuServer::Cloudinit::get_pending_config($conf, $vmid);
5023 my $regenerate = undef;
5024 for my $item (@$pending) {
5025 $regenerate = 1 if defined($item->{delete}) or defined($item->{pending});
5026 }
5027 PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid) if $regenerate;
5028 }
5029 }
5030
5031 sub try_deallocate_drive {
5032 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
5033
5034 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
5035 my $volid = $drive->{file};
5036 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
5037 my $sid = PVE::Storage::parse_volume_id($volid);
5038 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
5039
5040 # check if the disk is really unused
5041 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
5042 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
5043 PVE::Storage::vdisk_free($storecfg, $volid);
5044 return 1;
5045 } else {
5046 # If vm is not owner of this disk remove from config
5047 return 1;
5048 }
5049 }
5050
5051 return;
5052 }
5053
5054 sub vmconfig_delete_or_detach_drive {
5055 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
5056
5057 my $drive = parse_drive($opt, $conf->{$opt});
5058
5059 my $rpcenv = PVE::RPCEnvironment::get();
5060 my $authuser = $rpcenv->get_user();
5061
5062 if ($force) {
5063 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
5064 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
5065 } else {
5066 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
5067 }
5068 }
5069
5070
5071
5072 sub vmconfig_apply_pending {
5073 my ($vmid, $conf, $storecfg, $errors) = @_;
5074
5075 return if !scalar(keys %{$conf->{pending}});
5076
5077 my $add_apply_error = sub {
5078 my ($opt, $msg) = @_;
5079 my $err_msg = "unable to apply pending change $opt : $msg";
5080 $errors->{$opt} = $err_msg;
5081 warn $err_msg;
5082 };
5083
5084 # cold plug
5085
5086 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
5087 foreach my $opt (sort keys %$pending_delete_hash) {
5088 my $force = $pending_delete_hash->{$opt}->{force};
5089 eval {
5090 if ($opt =~ m/^unused/) {
5091 die "internal error";
5092 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5093 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5094 }
5095 };
5096 if (my $err = $@) {
5097 $add_apply_error->($opt, $err);
5098 } else {
5099 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5100 delete $conf->{$opt};
5101 }
5102 }
5103
5104 PVE::QemuConfig->cleanup_pending($conf);
5105
5106 my $generate_cloudnit = undef;
5107
5108 foreach my $opt (keys %{$conf->{pending}}) { # add/change
5109 next if $opt eq 'delete'; # just to be sure
5110 eval {
5111 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5112 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
5113 }
5114 };
5115 if (my $err = $@) {
5116 $add_apply_error->($opt, $err);
5117 } else {
5118
5119 if (is_valid_drivename($opt)) {
5120 my $drive = parse_drive($opt, $conf->{pending}->{$opt});
5121 $generate_cloudnit = 1 if drive_is_cloudinit($drive);
5122 }
5123
5124 $conf->{$opt} = delete $conf->{pending}->{$opt};
5125 }
5126 }
5127
5128 # write all changes at once to avoid unnecessary i/o
5129 PVE::QemuConfig->write_config($vmid, $conf);
5130 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if $generate_cloudnit;
5131 }
5132
5133 sub vmconfig_update_net {
5134 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5135
5136 my $newnet = parse_net($value);
5137
5138 if ($conf->{$opt}) {
5139 my $oldnet = parse_net($conf->{$opt});
5140
5141 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5142 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5143 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
5144 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
5145
5146 # for non online change, we try to hot-unplug
5147 die "skip\n" if !$hotplug;
5148 vm_deviceunplug($vmid, $conf, $opt);
5149 } else {
5150
5151 die "internal error" if $opt !~ m/net(\d+)/;
5152 my $iface = "tap${vmid}i$1";
5153
5154 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5155 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5156 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5157 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
5158 PVE::Network::tap_unplug($iface);
5159
5160 if ($have_sdn) {
5161 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5162 } else {
5163 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5164 }
5165 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
5166 # Rate can be applied on its own but any change above needs to
5167 # include the rate in tap_plug since OVS resets everything.
5168 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
5169 }
5170
5171 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
5172 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5173 }
5174
5175 return 1;
5176 }
5177 }
5178
5179 if ($hotplug) {
5180 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5181 } else {
5182 die "skip\n";
5183 }
5184 }
5185
5186 sub vmconfig_update_agent {
5187 my ($conf, $opt, $value) = @_;
5188
5189 die "skip\n" if !$conf->{$opt};
5190
5191 my $hotplug_options = { fstrim_cloned_disks => 1 };
5192
5193 my $old_agent = parse_guest_agent($conf);
5194 my $agent = parse_guest_agent({$opt => $value});
5195
5196 for my $option (keys %$agent) { # added/changed options
5197 next if defined($hotplug_options->{$option});
5198 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5199 }
5200
5201 for my $option (keys %$old_agent) { # removed options
5202 next if defined($hotplug_options->{$option});
5203 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5204 }
5205
5206 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
5207 }
5208
5209 sub vmconfig_update_disk {
5210 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5211
5212 my $drive = parse_drive($opt, $value);
5213
5214 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5215 my $media = $drive->{media} || 'disk';
5216 my $oldmedia = $old_drive->{media} || 'disk';
5217 die "unable to change media type\n" if $media ne $oldmedia;
5218
5219 if (!drive_is_cdrom($old_drive)) {
5220
5221 if ($drive->{file} ne $old_drive->{file}) {
5222
5223 die "skip\n" if !$hotplug;
5224
5225 # unplug and register as unused
5226 vm_deviceunplug($vmid, $conf, $opt);
5227 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5228
5229 } else {
5230 # update existing disk
5231
5232 # skip non hotpluggable value
5233 if (safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5234 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5235 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5236 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5237 safe_string_ne($drive->{ssd}, $old_drive->{ssd})) {
5238 die "skip\n";
5239 }
5240
5241 # apply throttle
5242 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5243 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5244 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5245 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5246 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5247 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5248 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5249 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5250 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5251 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5252 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5253 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5254 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5255 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5256 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5257 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5258 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5259 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5260
5261 qemu_block_set_io_throttle(
5262 $vmid,"drive-$opt",
5263 ($drive->{mbps} || 0)*1024*1024,
5264 ($drive->{mbps_rd} || 0)*1024*1024,
5265 ($drive->{mbps_wr} || 0)*1024*1024,
5266 $drive->{iops} || 0,
5267 $drive->{iops_rd} || 0,
5268 $drive->{iops_wr} || 0,
5269 ($drive->{mbps_max} || 0)*1024*1024,
5270 ($drive->{mbps_rd_max} || 0)*1024*1024,
5271 ($drive->{mbps_wr_max} || 0)*1024*1024,
5272 $drive->{iops_max} || 0,
5273 $drive->{iops_rd_max} || 0,
5274 $drive->{iops_wr_max} || 0,
5275 $drive->{bps_max_length} || 1,
5276 $drive->{bps_rd_max_length} || 1,
5277 $drive->{bps_wr_max_length} || 1,
5278 $drive->{iops_max_length} || 1,
5279 $drive->{iops_rd_max_length} || 1,
5280 $drive->{iops_wr_max_length} || 1,
5281 );
5282
5283 }
5284
5285 return 1;
5286 }
5287
5288 } else { # cdrom
5289
5290 if ($drive->{file} eq 'none') {
5291 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5292 if (drive_is_cloudinit($old_drive)) {
5293 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5294 }
5295 } else {
5296 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5297
5298 # force eject if locked
5299 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5300
5301 if ($path) {
5302 mon_cmd($vmid, "blockdev-change-medium",
5303 id => "$opt", filename => "$path");
5304 }
5305 }
5306
5307 return 1;
5308 }
5309 }
5310
5311 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5312 # hotplug new disks
5313 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5314 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5315 }
5316
5317 sub vmconfig_update_cloudinit_drive {
5318 my ($storecfg, $conf, $vmid) = @_;
5319
5320 my $cloudinit_ds = undef;
5321 my $cloudinit_drive = undef;
5322
5323 PVE::QemuConfig->foreach_volume($conf, sub {
5324 my ($ds, $drive) = @_;
5325 if (PVE::QemuServer::drive_is_cloudinit($drive)) {
5326 $cloudinit_ds = $ds;
5327 $cloudinit_drive = $drive;
5328 }
5329 });
5330
5331 return if !$cloudinit_drive;
5332
5333 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid);
5334 my $running = PVE::QemuServer::check_running($vmid);
5335
5336 if ($running) {
5337 my $path = PVE::Storage::path($storecfg, $cloudinit_drive->{file});
5338 if ($path) {
5339 mon_cmd($vmid, "eject", force => JSON::true, id => "$cloudinit_ds");
5340 mon_cmd($vmid, "blockdev-change-medium", id => "$cloudinit_ds", filename => "$path");
5341 }
5342 }
5343 }
5344
5345 # called in locked context by incoming migration
5346 sub vm_migrate_get_nbd_disks {
5347 my ($storecfg, $conf, $replicated_volumes) = @_;
5348
5349 my $local_volumes = {};
5350 PVE::QemuConfig->foreach_volume($conf, sub {
5351 my ($ds, $drive) = @_;
5352
5353 return if drive_is_cdrom($drive);
5354 return if $ds eq 'tpmstate0';
5355
5356 my $volid = $drive->{file};
5357
5358 return if !$volid;
5359
5360 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5361
5362 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5363 return if $scfg->{shared};
5364
5365 # replicated disks re-use existing state via bitmap
5366 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5367 $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
5368 });
5369 return $local_volumes;
5370 }
5371
5372 # called in locked context by incoming migration
5373 sub vm_migrate_alloc_nbd_disks {
5374 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5375
5376 my $nbd = {};
5377 foreach my $opt (sort keys %$source_volumes) {
5378 my ($volid, $storeid, $volname, $drive, $use_existing, $format) = @{$source_volumes->{$opt}};
5379
5380 if ($use_existing) {
5381 $nbd->{$opt}->{drivestr} = print_drive($drive);
5382 $nbd->{$opt}->{volid} = $volid;
5383 $nbd->{$opt}->{replicated} = 1;
5384 next;
5385 }
5386
5387 # storage mapping + volname = regular migration
5388 # storage mapping + format = remote migration
5389 # order of precedence, filtered by whether storage supports it:
5390 # 1. explicit requested format
5391 # 2. format of current volume
5392 # 3. default format of storage
5393 if (!$storagemap->{identity}) {
5394 $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
5395 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5396 if (!$format || !grep { $format eq $_ } @$validFormats) {
5397 if ($volname) {
5398 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5399 my $fileFormat = qemu_img_format($scfg, $volname);
5400 $format = $fileFormat
5401 if grep { $fileFormat eq $_ } @$validFormats;
5402 }
5403 $format //= $defFormat;
5404 }
5405 } else {
5406 # can't happen for remote migration, so $volname is always defined
5407 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5408 $format = qemu_img_format($scfg, $volname);
5409 }
5410
5411 my $size = $drive->{size} / 1024;
5412 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5413 my $newdrive = $drive;
5414 $newdrive->{format} = $format;
5415 $newdrive->{file} = $newvolid;
5416 my $drivestr = print_drive($newdrive);
5417 $nbd->{$opt}->{drivestr} = $drivestr;
5418 $nbd->{$opt}->{volid} = $newvolid;
5419 }
5420
5421 return $nbd;
5422 }
5423
5424 # see vm_start_nolock for parameters, additionally:
5425 # migrate_opts:
5426 # storagemap = parsed storage map for allocating NBD disks
5427 sub vm_start {
5428 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5429
5430 return PVE::QemuConfig->lock_config($vmid, sub {
5431 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5432
5433 die "you can't start a vm if it's a template\n"
5434 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5435
5436 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5437 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5438
5439 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5440
5441 if ($has_backup_lock && $running) {
5442 # a backup is currently running, attempt to start the guest in the
5443 # existing QEMU instance
5444 return vm_resume($vmid);
5445 }
5446
5447 PVE::QemuConfig->check_lock($conf)
5448 if !($params->{skiplock} || $has_suspended_lock);
5449
5450 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5451
5452 die "VM $vmid already running\n" if $running;
5453
5454 if (my $storagemap = $migrate_opts->{storagemap}) {
5455 my $replicated = $migrate_opts->{replicated_volumes};
5456 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5457 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5458
5459 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5460 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5461 }
5462 }
5463
5464 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5465 });
5466 }
5467
5468
5469 # params:
5470 # statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5471 # skiplock => 0/1, skip checking for config lock
5472 # skiptemplate => 0/1, skip checking whether VM is template
5473 # forcemachine => to force Qemu machine (rollback/migration)
5474 # forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5475 # timeout => in seconds
5476 # paused => start VM in paused state (backup)
5477 # resume => resume from hibernation
5478 # pbs-backing => {
5479 # sata0 => {
5480 # repository
5481 # snapshot
5482 # keyfile
5483 # archive
5484 # },
5485 # virtio2 => ...
5486 # }
5487 # migrate_opts:
5488 # nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5489 # migratedfrom => source node
5490 # spice_ticket => used for spice migration, passed via tunnel/stdin
5491 # network => CIDR of migration network
5492 # type => secure/insecure - tunnel over encrypted connection or plain-text
5493 # nbd_proto_version => int, 0 for TCP, 1 for UNIX
5494 # replicated_volumes => which volids should be re-used with bitmaps for nbd migration
5495 # offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
5496 # contained in config
5497 sub vm_start_nolock {
5498 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5499
5500 my $statefile = $params->{statefile};
5501 my $resume = $params->{resume};
5502
5503 my $migratedfrom = $migrate_opts->{migratedfrom};
5504 my $migration_type = $migrate_opts->{type};
5505
5506 my $res = {};
5507
5508 # clean up leftover reboot request files
5509 eval { clear_reboot_request($vmid); };
5510 warn $@ if $@;
5511
5512 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5513 vmconfig_apply_pending($vmid, $conf, $storecfg);
5514 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5515 }
5516
5517 # don't regenerate the ISO if the VM is started as part of a live migration
5518 # this way we can reuse the old ISO with the correct config
5519 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if !$migratedfrom;
5520
5521 # override offline migrated volumes, conf is out of date still
5522 if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
5523 for my $key (sort keys $offline_volumes->%*) {
5524 my $parsed = parse_drive($key, $conf->{$key});
5525 $parsed->{file} = $offline_volumes->{$key};
5526 $conf->{$key} = print_drive($parsed);
5527 }
5528 }
5529
5530 my $defaults = load_defaults();
5531
5532 # set environment variable useful inside network script
5533 $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom;
5534
5535 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5536
5537 my $forcemachine = $params->{forcemachine};
5538 my $forcecpu = $params->{forcecpu};
5539 if ($resume) {
5540 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5541 $forcemachine = $conf->{runningmachine};
5542 $forcecpu = $conf->{runningcpu};
5543 print "Resuming suspended VM\n";
5544 }
5545
5546 my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid,
5547 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
5548
5549 my $migration_ip;
5550 my $get_migration_ip = sub {
5551 my ($nodename) = @_;
5552
5553 return $migration_ip if defined($migration_ip);
5554
5555 my $cidr = $migrate_opts->{network};
5556
5557 if (!defined($cidr)) {
5558 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5559 $cidr = $dc_conf->{migration}->{network};
5560 }
5561
5562 if (defined($cidr)) {
5563 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5564
5565 die "could not get IP: no address configured on local " .
5566 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5567
5568 die "could not get IP: multiple addresses configured on local " .
5569 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5570
5571 $migration_ip = @$ips[0];
5572 }
5573
5574 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5575 if !defined($migration_ip);
5576
5577 return $migration_ip;
5578 };
5579
5580 my $migrate_uri;
5581 if ($statefile) {
5582 if ($statefile eq 'tcp') {
5583 my $localip = "localhost";
5584 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5585 my $nodename = nodename();
5586
5587 if (!defined($migration_type)) {
5588 if (defined($datacenterconf->{migration}->{type})) {
5589 $migration_type = $datacenterconf->{migration}->{type};
5590 } else {
5591 $migration_type = 'secure';
5592 }
5593 }
5594
5595 if ($migration_type eq 'insecure') {
5596 $localip = $get_migration_ip->($nodename);
5597 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5598 }
5599
5600 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5601 my $migrate_port = PVE::Tools::next_migrate_port($pfamily);
5602 $migrate_uri = "tcp:${localip}:${migrate_port}";
5603 push @$cmd, '-incoming', $migrate_uri;
5604 push @$cmd, '-S';
5605
5606 } elsif ($statefile eq 'unix') {
5607 # should be default for secure migrations as a ssh TCP forward
5608 # tunnel is not deterministic reliable ready and fails regurarly
5609 # to set up in time, so use UNIX socket forwards
5610 my $socket_addr = "/run/qemu-server/$vmid.migrate";
5611 unlink $socket_addr;
5612
5613 $migrate_uri = "unix:$socket_addr";
5614
5615 push @$cmd, '-incoming', $migrate_uri;
5616 push @$cmd, '-S';
5617
5618 } elsif (-e $statefile) {
5619 push @$cmd, '-loadstate', $statefile;
5620 } else {
5621 my $statepath = PVE::Storage::path($storecfg, $statefile);
5622 push @$vollist, $statefile;
5623 push @$cmd, '-loadstate', $statepath;
5624 }
5625 } elsif ($params->{paused}) {
5626 push @$cmd, '-S';
5627 }
5628
5629 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5630
5631 my $pci_devices = {}; # host pci devices
5632 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
5633 my $dev = $conf->{"hostpci$i"} or next;
5634 $pci_devices->{$i} = parse_hostpci($dev);
5635 }
5636
5637 # do not reserve pciid for mediated devices, sysfs will error out for duplicate assignment
5638 my $real_pci_devices = [ grep { !(defined($_->{mdev}) && scalar($_->{pciid}->@*) == 1) } values $pci_devices->%* ];
5639
5640 # map to a flat list of pci ids
5641 my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } $real_pci_devices->@* ];
5642
5643 # reserve all PCI IDs before actually doing anything with them
5644 PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout);
5645
5646 eval {
5647 my $uuid;
5648 for my $id (sort keys %$pci_devices) {
5649 my $d = $pci_devices->{$id};
5650 for my $dev ($d->{pciid}->@*) {
5651 my $info = PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
5652
5653 # nvidia grid needs the uuid of the mdev as qemu parameter
5654 if ($d->{mdev} && !defined($uuid) && $info->{vendor} eq '10de') {
5655 $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $id);
5656 }
5657 }
5658 }
5659 push @$cmd, '-uuid', $uuid if defined($uuid);
5660 };
5661 if (my $err = $@) {
5662 eval { cleanup_pci_devices($vmid, $conf) };
5663 warn $@ if $@;
5664 die $err;
5665 }
5666
5667 PVE::Storage::activate_volumes($storecfg, $vollist);
5668
5669 eval {
5670 run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{});
5671 };
5672 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5673 # timeout should be more than enough here...
5674 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
5675
5676 my $cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
5677
5678 my %run_params = (
5679 timeout => $statefile ? undef : $start_timeout,
5680 umask => 0077,
5681 noerr => 1,
5682 );
5683
5684 # when migrating, prefix QEMU output so other side can pick up any
5685 # errors that might occur and show the user
5686 if ($migratedfrom) {
5687 $run_params{quiet} = 1;
5688 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5689 }
5690
5691 my %systemd_properties = (
5692 Slice => 'qemu.slice',
5693 KillMode => 'process',
5694 SendSIGKILL => 0,
5695 TimeoutStopUSec => ULONG_MAX, # infinity
5696 );
5697
5698 if (PVE::CGroup::cgroup_mode() == 2) {
5699 $systemd_properties{CPUWeight} = $cpuunits;
5700 } else {
5701 $systemd_properties{CPUShares} = $cpuunits;
5702 }
5703
5704 if (my $cpulimit = $conf->{cpulimit}) {
5705 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5706 }
5707 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5708
5709 my $run_qemu = sub {
5710 PVE::Tools::run_fork sub {
5711 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5712
5713 my $tpmpid;
5714 if (my $tpm = $conf->{tpmstate0}) {
5715 # start the TPM emulator so QEMU can connect on start
5716 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5717 }
5718
5719 my $exitcode = run_command($cmd, %run_params);
5720 if ($exitcode) {
5721 if ($tpmpid) {
5722 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5723 kill 'TERM', $tpmpid;
5724 }
5725 die "QEMU exited with code $exitcode\n";
5726 }
5727 };
5728 };
5729
5730 if ($conf->{hugepages}) {
5731
5732 my $code = sub {
5733 my $hugepages_topology = PVE::QemuServer::Memory::hugepages_topology($conf);
5734 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5735
5736 PVE::QemuServer::Memory::hugepages_mount();
5737 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5738
5739 eval { $run_qemu->() };
5740 if (my $err = $@) {
5741 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5742 if !$conf->{keephugepages};
5743 die $err;
5744 }
5745
5746 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5747 if !$conf->{keephugepages};
5748 };
5749 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5750
5751 } else {
5752 eval { $run_qemu->() };
5753 }
5754
5755 if (my $err = $@) {
5756 # deactivate volumes if start fails
5757 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5758 warn $@ if $@;
5759 eval { cleanup_pci_devices($vmid, $conf) };
5760 warn $@ if $@;
5761
5762 die "start failed: $err";
5763 }
5764
5765 # re-reserve all PCI IDs now that we can know the actual VM PID
5766 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5767 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) };
5768 warn $@ if $@;
5769
5770 print "migration listens on $migrate_uri\n" if $migrate_uri;
5771 $res->{migrate_uri} = $migrate_uri;
5772
5773 if ($statefile && $statefile ne 'tcp' && $statefile ne 'unix') {
5774 eval { mon_cmd($vmid, "cont"); };
5775 warn $@ if $@;
5776 }
5777
5778 #start nbd server for storage migration
5779 if (my $nbd = $migrate_opts->{nbd}) {
5780 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
5781
5782 my $migrate_storage_uri;
5783 # nbd_protocol_version > 0 for unix socket support
5784 if ($nbd_protocol_version > 0 && $migration_type eq 'secure') {
5785 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5786 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5787 $migrate_storage_uri = "nbd:unix:$socket_path";
5788 } else {
5789 my $nodename = nodename();
5790 my $localip = $get_migration_ip->($nodename);
5791 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5792 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
5793
5794 mon_cmd($vmid, "nbd-server-start", addr => {
5795 type => 'inet',
5796 data => {
5797 host => "${localip}",
5798 port => "${storage_migrate_port}",
5799 },
5800 });
5801 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5802 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
5803 }
5804
5805 $res->{migrate_storage_uri} = $migrate_storage_uri;
5806
5807 foreach my $opt (sort keys %$nbd) {
5808 my $drivestr = $nbd->{$opt}->{drivestr};
5809 my $volid = $nbd->{$opt}->{volid};
5810 mon_cmd($vmid, "nbd-server-add", device => "drive-$opt", writable => JSON::true );
5811 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
5812 print "storage migration listens on $nbd_uri volume:$drivestr\n";
5813 print "re-using replicated volume: $opt - $volid\n"
5814 if $nbd->{$opt}->{replicated};
5815
5816 $res->{drives}->{$opt} = $nbd->{$opt};
5817 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
5818 }
5819 }
5820
5821 if ($migratedfrom) {
5822 eval {
5823 set_migration_caps($vmid);
5824 };
5825 warn $@ if $@;
5826
5827 if ($spice_port) {
5828 print "spice listens on port $spice_port\n";
5829 $res->{spice_port} = $spice_port;
5830 if ($migrate_opts->{spice_ticket}) {
5831 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
5832 $migrate_opts->{spice_ticket});
5833 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
5834 }
5835 }
5836
5837 } else {
5838 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
5839 if !$statefile && $conf->{balloon};
5840
5841 foreach my $opt (keys %$conf) {
5842 next if $opt !~ m/^net\d+$/;
5843 my $nicconf = parse_net($conf->{$opt});
5844 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
5845 }
5846 }
5847
5848 mon_cmd($vmid, 'qom-set',
5849 path => "machine/peripheral/balloon0",
5850 property => "guest-stats-polling-interval",
5851 value => 2) if (!defined($conf->{balloon}) || $conf->{balloon});
5852
5853 if ($resume) {
5854 print "Resumed VM, removing state\n";
5855 if (my $vmstate = $conf->{vmstate}) {
5856 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
5857 PVE::Storage::vdisk_free($storecfg, $vmstate);
5858 }
5859 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
5860 PVE::QemuConfig->write_config($vmid, $conf);
5861 }
5862
5863 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
5864
5865 return $res;
5866 }
5867
5868 sub vm_commandline {
5869 my ($storecfg, $vmid, $snapname) = @_;
5870
5871 my $conf = PVE::QemuConfig->load_config($vmid);
5872
5873 my ($forcemachine, $forcecpu);
5874 if ($snapname) {
5875 my $snapshot = $conf->{snapshots}->{$snapname};
5876 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
5877
5878 # check for machine or CPU overrides in snapshot
5879 $forcemachine = $snapshot->{runningmachine};
5880 $forcecpu = $snapshot->{runningcpu};
5881
5882 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
5883
5884 $conf = $snapshot;
5885 }
5886
5887 my $defaults = load_defaults();
5888
5889 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
5890
5891 return PVE::Tools::cmd2string($cmd);
5892 }
5893
5894 sub vm_reset {
5895 my ($vmid, $skiplock) = @_;
5896
5897 PVE::QemuConfig->lock_config($vmid, sub {
5898
5899 my $conf = PVE::QemuConfig->load_config($vmid);
5900
5901 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5902
5903 mon_cmd($vmid, "system_reset");
5904 });
5905 }
5906
5907 sub get_vm_volumes {
5908 my ($conf) = @_;
5909
5910 my $vollist = [];
5911 foreach_volid($conf, sub {
5912 my ($volid, $attr) = @_;
5913
5914 return if $volid =~ m|^/|;
5915
5916 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
5917 return if !$sid;
5918
5919 push @$vollist, $volid;
5920 });
5921
5922 return $vollist;
5923 }
5924
5925 sub cleanup_pci_devices {
5926 my ($vmid, $conf) = @_;
5927
5928 foreach my $key (keys %$conf) {
5929 next if $key !~ m/^hostpci(\d+)$/;
5930 my $hostpciindex = $1;
5931 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
5932 my $d = parse_hostpci($conf->{$key});
5933 if ($d->{mdev}) {
5934 # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
5935 # don't want to break ABI just for this two liner
5936 my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
5937 PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1") if -e $dev_sysfs_dir;
5938 }
5939 }
5940 PVE::QemuServer::PCI::remove_pci_reservation($vmid);
5941 }
5942
5943 sub vm_stop_cleanup {
5944 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
5945
5946 eval {
5947
5948 if (!$keepActive) {
5949 my $vollist = get_vm_volumes($conf);
5950 PVE::Storage::deactivate_volumes($storecfg, $vollist);
5951
5952 if (my $tpmdrive = $conf->{tpmstate0}) {
5953 my $tpm = parse_drive("tpmstate0", $tpmdrive);
5954 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
5955 if ($storeid) {
5956 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
5957 }
5958 }
5959 }
5960
5961 foreach my $ext (qw(mon qmp pid vnc qga)) {
5962 unlink "/var/run/qemu-server/${vmid}.$ext";
5963 }
5964
5965 if ($conf->{ivshmem}) {
5966 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
5967 # just delete it for now, VMs which have this already open do not
5968 # are affected, but new VMs will get a separated one. If this
5969 # becomes an issue we either add some sort of ref-counting or just
5970 # add a "don't delete on stop" flag to the ivshmem format.
5971 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
5972 }
5973
5974 cleanup_pci_devices($vmid, $conf);
5975
5976 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
5977 };
5978 warn $@ if $@; # avoid errors - just warn
5979 }
5980
5981 # call only in locked context
5982 sub _do_vm_stop {
5983 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
5984
5985 my $pid = check_running($vmid, $nocheck);
5986 return if !$pid;
5987
5988 my $conf;
5989 if (!$nocheck) {
5990 $conf = PVE::QemuConfig->load_config($vmid);
5991 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5992 if (!defined($timeout) && $shutdown && $conf->{startup}) {
5993 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
5994 $timeout = $opts->{down} if $opts->{down};
5995 }
5996 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
5997 }
5998
5999 eval {
6000 if ($shutdown) {
6001 if (defined($conf) && get_qga_key($conf, 'enabled')) {
6002 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
6003 } else {
6004 mon_cmd($vmid, "system_powerdown");
6005 }
6006 } else {
6007 mon_cmd($vmid, "quit");
6008 }
6009 };
6010 my $err = $@;
6011
6012 if (!$err) {
6013 $timeout = 60 if !defined($timeout);
6014
6015 my $count = 0;
6016 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6017 $count++;
6018 sleep 1;
6019 }
6020
6021 if ($count >= $timeout) {
6022 if ($force) {
6023 warn "VM still running - terminating now with SIGTERM\n";
6024 kill 15, $pid;
6025 } else {
6026 die "VM quit/powerdown failed - got timeout\n";
6027 }
6028 } else {
6029 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6030 return;
6031 }
6032 } else {
6033 if (!check_running($vmid, $nocheck)) {
6034 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
6035 return;
6036 }
6037 if ($force) {
6038 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
6039 kill 15, $pid;
6040 } else {
6041 die "VM quit/powerdown failed\n";
6042 }
6043 }
6044
6045 # wait again
6046 $timeout = 10;
6047
6048 my $count = 0;
6049 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6050 $count++;
6051 sleep 1;
6052 }
6053
6054 if ($count >= $timeout) {
6055 warn "VM still running - terminating now with SIGKILL\n";
6056 kill 9, $pid;
6057 sleep 1;
6058 }
6059
6060 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6061 }
6062
6063 # Note: use $nocheck to skip tests if VM configuration file exists.
6064 # We need that when migration VMs to other nodes (files already moved)
6065 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
6066 sub vm_stop {
6067 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
6068
6069 $force = 1 if !defined($force) && !$shutdown;
6070
6071 if ($migratedfrom){
6072 my $pid = check_running($vmid, $nocheck, $migratedfrom);
6073 kill 15, $pid if $pid;
6074 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
6075 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
6076 return;
6077 }
6078
6079 PVE::QemuConfig->lock_config($vmid, sub {
6080 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
6081 });
6082 }
6083
6084 sub vm_reboot {
6085 my ($vmid, $timeout) = @_;
6086
6087 PVE::QemuConfig->lock_config($vmid, sub {
6088 eval {
6089
6090 # only reboot if running, as qmeventd starts it again on a stop event
6091 return if !check_running($vmid);
6092
6093 create_reboot_request($vmid);
6094
6095 my $storecfg = PVE::Storage::config();
6096 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
6097
6098 };
6099 if (my $err = $@) {
6100 # avoid that the next normal shutdown will be confused for a reboot
6101 clear_reboot_request($vmid);
6102 die $err;
6103 }
6104 });
6105 }
6106
6107 # note: if using the statestorage parameter, the caller has to check privileges
6108 sub vm_suspend {
6109 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
6110
6111 my $conf;
6112 my $path;
6113 my $storecfg;
6114 my $vmstate;
6115
6116 PVE::QemuConfig->lock_config($vmid, sub {
6117
6118 $conf = PVE::QemuConfig->load_config($vmid);
6119
6120 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
6121 PVE::QemuConfig->check_lock($conf)
6122 if !($skiplock || $is_backing_up);
6123
6124 die "cannot suspend to disk during backup\n"
6125 if $is_backing_up && $includestate;
6126
6127 if ($includestate) {
6128 $conf->{lock} = 'suspending';
6129 my $date = strftime("%Y-%m-%d", localtime(time()));
6130 $storecfg = PVE::Storage::config();
6131 if (!$statestorage) {
6132 $statestorage = find_vmstate_storage($conf, $storecfg);
6133 # check permissions for the storage
6134 my $rpcenv = PVE::RPCEnvironment::get();
6135 if ($rpcenv->{type} ne 'cli') {
6136 my $authuser = $rpcenv->get_user();
6137 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
6138 }
6139 }
6140
6141
6142 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
6143 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
6144 $path = PVE::Storage::path($storecfg, $vmstate);
6145 PVE::QemuConfig->write_config($vmid, $conf);
6146 } else {
6147 mon_cmd($vmid, "stop");
6148 }
6149 });
6150
6151 if ($includestate) {
6152 # save vm state
6153 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
6154
6155 eval {
6156 set_migration_caps($vmid, 1);
6157 mon_cmd($vmid, "savevm-start", statefile => $path);
6158 for(;;) {
6159 my $state = mon_cmd($vmid, "query-savevm");
6160 if (!$state->{status}) {
6161 die "savevm not active\n";
6162 } elsif ($state->{status} eq 'active') {
6163 sleep(1);
6164 next;
6165 } elsif ($state->{status} eq 'completed') {
6166 print "State saved, quitting\n";
6167 last;
6168 } elsif ($state->{status} eq 'failed' && $state->{error}) {
6169 die "query-savevm failed with error '$state->{error}'\n"
6170 } else {
6171 die "query-savevm returned status '$state->{status}'\n";
6172 }
6173 }
6174 };
6175 my $err = $@;
6176
6177 PVE::QemuConfig->lock_config($vmid, sub {
6178 $conf = PVE::QemuConfig->load_config($vmid);
6179 if ($err) {
6180 # cleanup, but leave suspending lock, to indicate something went wrong
6181 eval {
6182 mon_cmd($vmid, "savevm-end");
6183 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6184 PVE::Storage::vdisk_free($storecfg, $vmstate);
6185 delete $conf->@{qw(vmstate runningmachine runningcpu)};
6186 PVE::QemuConfig->write_config($vmid, $conf);
6187 };
6188 warn $@ if $@;
6189 die $err;
6190 }
6191
6192 die "lock changed unexpectedly\n"
6193 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6194
6195 mon_cmd($vmid, "quit");
6196 $conf->{lock} = 'suspended';
6197 PVE::QemuConfig->write_config($vmid, $conf);
6198 });
6199 }
6200 }
6201
6202 sub vm_resume {
6203 my ($vmid, $skiplock, $nocheck) = @_;
6204
6205 PVE::QemuConfig->lock_config($vmid, sub {
6206 my $res = mon_cmd($vmid, 'query-status');
6207 my $resume_cmd = 'cont';
6208 my $reset = 0;
6209
6210 if ($res->{status}) {
6211 return if $res->{status} eq 'running'; # job done, go home
6212 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6213 $reset = 1 if $res->{status} eq 'shutdown';
6214 }
6215
6216 if (!$nocheck) {
6217
6218 my $conf = PVE::QemuConfig->load_config($vmid);
6219
6220 PVE::QemuConfig->check_lock($conf)
6221 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
6222 }
6223
6224 if ($reset) {
6225 # required if a VM shuts down during a backup and we get a resume
6226 # request before the backup finishes for example
6227 mon_cmd($vmid, "system_reset");
6228 }
6229 mon_cmd($vmid, $resume_cmd);
6230 });
6231 }
6232
6233 sub vm_sendkey {
6234 my ($vmid, $skiplock, $key) = @_;
6235
6236 PVE::QemuConfig->lock_config($vmid, sub {
6237
6238 my $conf = PVE::QemuConfig->load_config($vmid);
6239
6240 # there is no qmp command, so we use the human monitor command
6241 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
6242 die $res if $res ne '';
6243 });
6244 }
6245
6246 # vzdump restore implementaion
6247
6248 sub tar_archive_read_firstfile {
6249 my $archive = shift;
6250
6251 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6252
6253 # try to detect archive type first
6254 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
6255 die "unable to open file '$archive'\n";
6256 my $firstfile = <$fh>;
6257 kill 15, $pid;
6258 close $fh;
6259
6260 die "ERROR: archive contaions no data\n" if !$firstfile;
6261 chomp $firstfile;
6262
6263 return $firstfile;
6264 }
6265
6266 sub tar_restore_cleanup {
6267 my ($storecfg, $statfile) = @_;
6268
6269 print STDERR "starting cleanup\n";
6270
6271 if (my $fd = IO::File->new($statfile, "r")) {
6272 while (defined(my $line = <$fd>)) {
6273 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6274 my $volid = $2;
6275 eval {
6276 if ($volid =~ m|^/|) {
6277 unlink $volid || die 'unlink failed\n';
6278 } else {
6279 PVE::Storage::vdisk_free($storecfg, $volid);
6280 }
6281 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6282 };
6283 print STDERR "unable to cleanup '$volid' - $@" if $@;
6284 } else {
6285 print STDERR "unable to parse line in statfile - $line";
6286 }
6287 }
6288 $fd->close();
6289 }
6290 }
6291
6292 sub restore_file_archive {
6293 my ($archive, $vmid, $user, $opts) = @_;
6294
6295 return restore_vma_archive($archive, $vmid, $user, $opts)
6296 if $archive eq '-';
6297
6298 my $info = PVE::Storage::archive_info($archive);
6299 my $format = $opts->{format} // $info->{format};
6300 my $comp = $info->{compression};
6301
6302 # try to detect archive format
6303 if ($format eq 'tar') {
6304 return restore_tar_archive($archive, $vmid, $user, $opts);
6305 } else {
6306 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6307 }
6308 }
6309
6310 # hepler to remove disks that will not be used after restore
6311 my $restore_cleanup_oldconf = sub {
6312 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6313
6314 my $kept_disks = {};
6315
6316 PVE::QemuConfig->foreach_volume($oldconf, sub {
6317 my ($ds, $drive) = @_;
6318
6319 return if drive_is_cdrom($drive, 1);
6320
6321 my $volid = $drive->{file};
6322 return if !$volid || $volid =~ m|^/|;
6323
6324 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6325 return if !$path || !$owner || ($owner != $vmid);
6326
6327 # Note: only delete disk we want to restore
6328 # other volumes will become unused
6329 if ($virtdev_hash->{$ds}) {
6330 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6331 if (my $err = $@) {
6332 warn $err;
6333 }
6334 } else {
6335 $kept_disks->{$volid} = 1;
6336 }
6337 });
6338
6339 # after the restore we have no snapshots anymore
6340 for my $snapname (keys $oldconf->{snapshots}->%*) {
6341 my $snap = $oldconf->{snapshots}->{$snapname};
6342 if ($snap->{vmstate}) {
6343 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6344 if (my $err = $@) {
6345 warn $err;
6346 }
6347 }
6348
6349 for my $volid (keys $kept_disks->%*) {
6350 eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname); };
6351 warn $@ if $@;
6352 }
6353 }
6354 };
6355
6356 # Helper to parse vzdump backup device hints
6357 #
6358 # $rpcenv: Environment, used to ckeck storage permissions
6359 # $user: User ID, to check storage permissions
6360 # $storecfg: Storage configuration
6361 # $fh: the file handle for reading the configuration
6362 # $devinfo: should contain device sizes for all backu-up'ed devices
6363 # $options: backup options (pool, default storage)
6364 #
6365 # Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6366 my $parse_backup_hints = sub {
6367 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6368
6369 my $check_storage = sub { # assert if an image can be allocate
6370 my ($storeid, $scfg) = @_;
6371 die "Content type 'images' is not available on storage '$storeid'\n"
6372 if !$scfg->{content}->{images};
6373 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace'])
6374 if $user ne 'root@pam';
6375 };
6376
6377 my $virtdev_hash = {};
6378 while (defined(my $line = <$fh>)) {
6379 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6380 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6381 die "archive does not contain data for drive '$virtdev'\n"
6382 if !$devinfo->{$devname};
6383
6384 if (defined($options->{storage})) {
6385 $storeid = $options->{storage} || 'local';
6386 } elsif (!$storeid) {
6387 $storeid = 'local';
6388 }
6389 $format = 'raw' if !$format;
6390 $devinfo->{$devname}->{devname} = $devname;
6391 $devinfo->{$devname}->{virtdev} = $virtdev;
6392 $devinfo->{$devname}->{format} = $format;
6393 $devinfo->{$devname}->{storeid} = $storeid;
6394
6395 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6396 $check_storage->($storeid, $scfg); # permission and content type check
6397
6398 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6399 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6400 my $virtdev = $1;
6401 my $drive = parse_drive($virtdev, $2);
6402
6403 if (drive_is_cloudinit($drive)) {
6404 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6405 $storeid = $options->{storage} if defined ($options->{storage});
6406 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6407 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6408
6409 $check_storage->($storeid, $scfg); # permission and content type check
6410
6411 $virtdev_hash->{$virtdev} = {
6412 format => $format,
6413 storeid => $storeid,
6414 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6415 is_cloudinit => 1,
6416 };
6417 }
6418 }
6419 }
6420
6421 return $virtdev_hash;
6422 };
6423
6424 # Helper to allocate and activate all volumes required for a restore
6425 #
6426 # $storecfg: Storage configuration
6427 # $virtdev_hash: as returned by parse_backup_hints()
6428 #
6429 # Returns: { $virtdev => $volid }
6430 my $restore_allocate_devices = sub {
6431 my ($storecfg, $virtdev_hash, $vmid) = @_;
6432
6433 my $map = {};
6434 foreach my $virtdev (sort keys %$virtdev_hash) {
6435 my $d = $virtdev_hash->{$virtdev};
6436 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6437 my $storeid = $d->{storeid};
6438 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6439
6440 # test if requested format is supported
6441 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6442 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6443 $d->{format} = $defFormat if !$supported;
6444
6445 my $name;
6446 if ($d->{is_cloudinit}) {
6447 $name = "vm-$vmid-cloudinit";
6448 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6449 if ($scfg->{path}) {
6450 $name .= ".$d->{format}";
6451 }
6452 }
6453
6454 my $volid = PVE::Storage::vdisk_alloc(
6455 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6456
6457 print STDERR "new volume ID is '$volid'\n";
6458 $d->{volid} = $volid;
6459
6460 PVE::Storage::activate_volumes($storecfg, [$volid]);
6461
6462 $map->{$virtdev} = $volid;
6463 }
6464
6465 return $map;
6466 };
6467
6468 sub restore_update_config_line {
6469 my ($cookie, $map, $line, $unique) = @_;
6470
6471 return '' if $line =~ m/^\#qmdump\#/;
6472 return '' if $line =~ m/^\#vzdump\#/;
6473 return '' if $line =~ m/^lock:/;
6474 return '' if $line =~ m/^unused\d+:/;
6475 return '' if $line =~ m/^parent:/;
6476
6477 my $res = '';
6478
6479 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6480 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6481 # try to convert old 1.X settings
6482 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6483 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6484 my ($model, $macaddr) = split(/\=/, $devconfig);
6485 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6486 my $net = {
6487 model => $model,
6488 bridge => "vmbr$ind",
6489 macaddr => $macaddr,
6490 };
6491 my $netstr = print_net($net);
6492
6493 $res .= "net$cookie->{netcount}: $netstr\n";
6494 $cookie->{netcount}++;
6495 }
6496 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6497 my ($id, $netstr) = ($1, $2);
6498 my $net = parse_net($netstr);
6499 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6500 $netstr = print_net($net);
6501 $res .= "$id: $netstr\n";
6502 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6503 my $virtdev = $1;
6504 my $value = $3;
6505 my $di = parse_drive($virtdev, $value);
6506 if (defined($di->{backup}) && !$di->{backup}) {
6507 $res .= "#$line";
6508 } elsif ($map->{$virtdev}) {
6509 delete $di->{format}; # format can change on restore
6510 $di->{file} = $map->{$virtdev};
6511 $value = print_drive($di);
6512 $res .= "$virtdev: $value\n";
6513 } else {
6514 $res .= $line;
6515 }
6516 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6517 my $vmgenid = $1;
6518 if ($vmgenid ne '0') {
6519 # always generate a new vmgenid if there was a valid one setup
6520 $vmgenid = generate_uuid();
6521 }
6522 $res .= "vmgenid: $vmgenid\n";
6523 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6524 my ($uuid, $uuid_str);
6525 UUID::generate($uuid);
6526 UUID::unparse($uuid, $uuid_str);
6527 my $smbios1 = parse_smbios1($2);
6528 $smbios1->{uuid} = $uuid_str;
6529 $res .= $1.print_smbios1($smbios1)."\n";
6530 } else {
6531 $res .= $line;
6532 }
6533
6534 return $res;
6535 }
6536
6537 my $restore_deactivate_volumes = sub {
6538 my ($storecfg, $virtdev_hash) = @_;
6539
6540 my $vollist = [];
6541 for my $dev (values $virtdev_hash->%*) {
6542 push $vollist->@*, $dev->{volid} if $dev->{volid};
6543 }
6544
6545 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
6546 print STDERR $@ if $@;
6547 };
6548
6549 my $restore_destroy_volumes = sub {
6550 my ($storecfg, $virtdev_hash) = @_;
6551
6552 for my $dev (values $virtdev_hash->%*) {
6553 my $volid = $dev->{volid} or next;
6554 eval {
6555 PVE::Storage::vdisk_free($storecfg, $volid);
6556 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6557 };
6558 print STDERR "unable to cleanup '$volid' - $@" if $@;
6559 }
6560 };
6561
6562 my $restore_merge_config = sub {
6563 my ($filename, $backup_conf_raw, $override_conf) = @_;
6564
6565 my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
6566 for my $key (keys $override_conf->%*) {
6567 $backup_conf->{$key} = $override_conf->{$key};
6568 }
6569
6570 return $backup_conf;
6571 };
6572
6573 sub scan_volids {
6574 my ($cfg, $vmid) = @_;
6575
6576 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6577
6578 my $volid_hash = {};
6579 foreach my $storeid (keys %$info) {
6580 foreach my $item (@{$info->{$storeid}}) {
6581 next if !($item->{volid} && $item->{size});
6582 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6583 $volid_hash->{$item->{volid}} = $item;
6584 }
6585 }
6586
6587 return $volid_hash;
6588 }
6589
6590 sub update_disk_config {
6591 my ($vmid, $conf, $volid_hash) = @_;
6592
6593 my $changes;
6594 my $prefix = "VM $vmid";
6595
6596 # used and unused disks
6597 my $referenced = {};
6598
6599 # Note: it is allowed to define multiple storages with same path (alias), so
6600 # we need to check both 'volid' and real 'path' (two different volid can point
6601 # to the same path).
6602
6603 my $referencedpath = {};
6604
6605 # update size info
6606 PVE::QemuConfig->foreach_volume($conf, sub {
6607 my ($opt, $drive) = @_;
6608
6609 my $volid = $drive->{file};
6610 return if !$volid;
6611 my $volume = $volid_hash->{$volid};
6612
6613 # mark volid as "in-use" for next step
6614 $referenced->{$volid} = 1;
6615 if ($volume && (my $path = $volume->{path})) {
6616 $referencedpath->{$path} = 1;
6617 }
6618
6619 return if drive_is_cdrom($drive);
6620 return if !$volume;
6621
6622 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6623 if (defined($updated)) {
6624 $changes = 1;
6625 $conf->{$opt} = print_drive($updated);
6626 print "$prefix ($opt): $msg\n";
6627 }
6628 });
6629
6630 # remove 'unusedX' entry if volume is used
6631 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6632 my ($opt, $drive) = @_;
6633
6634 my $volid = $drive->{file};
6635 return if !$volid;
6636
6637 my $path;
6638 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6639 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6640 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6641 $changes = 1;
6642 delete $conf->{$opt};
6643 }
6644
6645 $referenced->{$volid} = 1;
6646 $referencedpath->{$path} = 1 if $path;
6647 });
6648
6649 foreach my $volid (sort keys %$volid_hash) {
6650 next if $volid =~ m/vm-$vmid-state-/;
6651 next if $referenced->{$volid};
6652 my $path = $volid_hash->{$volid}->{path};
6653 next if !$path; # just to be sure
6654 next if $referencedpath->{$path};
6655 $changes = 1;
6656 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6657 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6658 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6659 }
6660
6661 return $changes;
6662 }
6663
6664 sub rescan {
6665 my ($vmid, $nolock, $dryrun) = @_;
6666
6667 my $cfg = PVE::Storage::config();
6668
6669 print "rescan volumes...\n";
6670 my $volid_hash = scan_volids($cfg, $vmid);
6671
6672 my $updatefn = sub {
6673 my ($vmid) = @_;
6674
6675 my $conf = PVE::QemuConfig->load_config($vmid);
6676
6677 PVE::QemuConfig->check_lock($conf);
6678
6679 my $vm_volids = {};
6680 foreach my $volid (keys %$volid_hash) {
6681 my $info = $volid_hash->{$volid};
6682 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6683 }
6684
6685 my $changes = update_disk_config($vmid, $conf, $vm_volids);
6686
6687 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
6688 };
6689
6690 if (defined($vmid)) {
6691 if ($nolock) {
6692 &$updatefn($vmid);
6693 } else {
6694 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6695 }
6696 } else {
6697 my $vmlist = config_list();
6698 foreach my $vmid (keys %$vmlist) {
6699 if ($nolock) {
6700 &$updatefn($vmid);
6701 } else {
6702 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6703 }
6704 }
6705 }
6706 }
6707
6708 sub restore_proxmox_backup_archive {
6709 my ($archive, $vmid, $user, $options) = @_;
6710
6711 my $storecfg = PVE::Storage::config();
6712
6713 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
6714 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6715
6716 my $fingerprint = $scfg->{fingerprint};
6717 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
6718
6719 my $repo = PVE::PBSClient::get_repository($scfg);
6720 my $namespace = $scfg->{namespace};
6721
6722 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
6723 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
6724 local $ENV{PBS_PASSWORD} = $password;
6725 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
6726
6727 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
6728 PVE::Storage::parse_volname($storecfg, $archive);
6729
6730 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
6731
6732 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
6733
6734 my $tmpdir = "/var/tmp/vzdumptmp$$";
6735 rmtree $tmpdir;
6736 mkpath $tmpdir;
6737
6738 my $conffile = PVE::QemuConfig->config_file($vmid);
6739 # disable interrupts (always do cleanups)
6740 local $SIG{INT} =
6741 local $SIG{TERM} =
6742 local $SIG{QUIT} =
6743 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
6744
6745 # Note: $oldconf is undef if VM does not exists
6746 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6747 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6748 my $new_conf_raw = '';
6749
6750 my $rpcenv = PVE::RPCEnvironment::get();
6751 my $devinfo = {}; # info about drives included in backup
6752 my $virtdev_hash = {}; # info about allocated drives
6753
6754 eval {
6755 # enable interrupts
6756 local $SIG{INT} =
6757 local $SIG{TERM} =
6758 local $SIG{QUIT} =
6759 local $SIG{HUP} =
6760 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6761
6762 my $cfgfn = "$tmpdir/qemu-server.conf";
6763 my $firewall_config_fn = "$tmpdir/fw.conf";
6764 my $index_fn = "$tmpdir/index.json";
6765
6766 my $cmd = "restore";
6767
6768 my $param = [$pbs_backup_name, "index.json", $index_fn];
6769 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6770 my $index = PVE::Tools::file_get_contents($index_fn);
6771 $index = decode_json($index);
6772
6773 foreach my $info (@{$index->{files}}) {
6774 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
6775 my $devname = $1;
6776 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
6777 $devinfo->{$devname}->{size} = $1;
6778 } else {
6779 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
6780 }
6781 }
6782 }
6783
6784 my $is_qemu_server_backup = scalar(
6785 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
6786 );
6787 if (!$is_qemu_server_backup) {
6788 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
6789 }
6790 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
6791
6792 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
6793 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6794
6795 if ($has_firewall_config) {
6796 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
6797 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6798
6799 my $pve_firewall_dir = '/etc/pve/firewall';
6800 mkdir $pve_firewall_dir; # make sure the dir exists
6801 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
6802 }
6803
6804 my $fh = IO::File->new($cfgfn, "r") ||
6805 die "unable to read qemu-server.conf - $!\n";
6806
6807 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
6808
6809 # fixme: rate limit?
6810
6811 # create empty/temp config
6812 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
6813
6814 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
6815
6816 # allocate volumes
6817 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
6818
6819 foreach my $virtdev (sort keys %$virtdev_hash) {
6820 my $d = $virtdev_hash->{$virtdev};
6821 next if $d->{is_cloudinit}; # no need to restore cloudinit
6822
6823 # this fails if storage is unavailable
6824 my $volid = $d->{volid};
6825 my $path = PVE::Storage::path($storecfg, $volid);
6826
6827 # for live-restore we only want to preload the efidisk and TPM state
6828 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
6829
6830 my @ns_arg;
6831 if (defined(my $ns = $scfg->{namespace})) {
6832 @ns_arg = ('--ns', $ns);
6833 }
6834
6835 my $pbs_restore_cmd = [
6836 '/usr/bin/pbs-restore',
6837 '--repository', $repo,
6838 @ns_arg,
6839 $pbs_backup_name,
6840 "$d->{devname}.img.fidx",
6841 $path,
6842 '--verbose',
6843 ];
6844
6845 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
6846 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
6847
6848 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
6849 push @$pbs_restore_cmd, '--skip-zero';
6850 }
6851
6852 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
6853 print "restore proxmox backup image: $dbg_cmdstring\n";
6854 run_command($pbs_restore_cmd);
6855 }
6856
6857 $fh->seek(0, 0) || die "seek failed - $!\n";
6858
6859 my $cookie = { netcount => 0 };
6860 while (defined(my $line = <$fh>)) {
6861 $new_conf_raw .= restore_update_config_line(
6862 $cookie,
6863 $map,
6864 $line,
6865 $options->{unique},
6866 );
6867 }
6868
6869 $fh->close();
6870 };
6871 my $err = $@;
6872
6873 if ($err || !$options->{live}) {
6874 $restore_deactivate_volumes->($storecfg, $virtdev_hash);
6875 }
6876
6877 rmtree $tmpdir;
6878
6879 if ($err) {
6880 $restore_destroy_volumes->($storecfg, $virtdev_hash);
6881 die $err;
6882 }
6883
6884 if ($options->{live}) {
6885 # keep lock during live-restore
6886 $new_conf_raw .= "\nlock: create";
6887 }
6888
6889 my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $options->{override_conf});
6890 PVE::QemuConfig->write_config($vmid, $new_conf);
6891
6892 eval { rescan($vmid, 1); };
6893 warn $@ if $@;
6894
6895 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
6896
6897 if ($options->{live}) {
6898 # enable interrupts
6899 local $SIG{INT} =
6900 local $SIG{TERM} =
6901 local $SIG{QUIT} =
6902 local $SIG{HUP} =
6903 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
6904
6905 my $conf = PVE::QemuConfig->load_config($vmid);
6906 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
6907
6908 # these special drives are already restored before start
6909 delete $devinfo->{'drive-efidisk0'};
6910 delete $devinfo->{'drive-tpmstate0-backup'};
6911
6912 my $pbs_opts = {
6913 repo => $repo,
6914 keyfile => $keyfile,
6915 snapshot => $pbs_backup_name,
6916 namespace => $namespace,
6917 };
6918 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $pbs_opts);
6919
6920 PVE::QemuConfig->remove_lock($vmid, "create");
6921 }
6922 }
6923
6924 sub pbs_live_restore {
6925 my ($vmid, $conf, $storecfg, $restored_disks, $opts) = @_;
6926
6927 print "starting VM for live-restore\n";
6928 print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n";
6929
6930 my $pbs_backing = {};
6931 for my $ds (keys %$restored_disks) {
6932 $ds =~ m/^drive-(.*)$/;
6933 my $confname = $1;
6934 $pbs_backing->{$confname} = {
6935 repository => $opts->{repo},
6936 snapshot => $opts->{snapshot},
6937 archive => "$ds.img.fidx",
6938 };
6939 $pbs_backing->{$confname}->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile};
6940 $pbs_backing->{$confname}->{namespace} = $opts->{namespace} if defined($opts->{namespace});
6941
6942 my $drive = parse_drive($confname, $conf->{$confname});
6943 print "restoring '$ds' to '$drive->{file}'\n";
6944 }
6945
6946 my $drives_streamed = 0;
6947 eval {
6948 # make sure HA doesn't interrupt our restore by stopping the VM
6949 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
6950 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
6951 }
6952
6953 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
6954 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
6955 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
6956
6957 my $qmeventd_fd = register_qmeventd_handle($vmid);
6958
6959 # begin streaming, i.e. data copy from PBS to target disk for every vol,
6960 # this will effectively collapse the backing image chain consisting of
6961 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
6962 # removes itself once all backing images vanish with 'auto-remove=on')
6963 my $jobs = {};
6964 for my $ds (sort keys %$restored_disks) {
6965 my $job_id = "restore-$ds";
6966 mon_cmd($vmid, 'block-stream',
6967 'job-id' => $job_id,
6968 device => "$ds",
6969 );
6970 $jobs->{$job_id} = {};
6971 }
6972
6973 mon_cmd($vmid, 'cont');
6974 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
6975
6976 print "restore-drive jobs finished successfully, removing all tracking block devices"
6977 ." to disconnect from Proxmox Backup Server\n";
6978
6979 for my $ds (sort keys %$restored_disks) {
6980 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
6981 }
6982
6983 close($qmeventd_fd);
6984 };
6985
6986 my $err = $@;
6987
6988 if ($err) {
6989 warn "An error occurred during live-restore: $err\n";
6990 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
6991 die "live-restore failed\n";
6992 }
6993 }
6994
6995 sub restore_vma_archive {
6996 my ($archive, $vmid, $user, $opts, $comp) = @_;
6997
6998 my $readfrom = $archive;
6999
7000 my $cfg = PVE::Storage::config();
7001 my $commands = [];
7002 my $bwlimit = $opts->{bwlimit};
7003
7004 my $dbg_cmdstring = '';
7005 my $add_pipe = sub {
7006 my ($cmd) = @_;
7007 push @$commands, $cmd;
7008 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
7009 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
7010 $readfrom = '-';
7011 };
7012
7013 my $input = undef;
7014 if ($archive eq '-') {
7015 $input = '<&STDIN';
7016 } else {
7017 # If we use a backup from a PVE defined storage we also consider that
7018 # storage's rate limit:
7019 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
7020 if (defined($volid)) {
7021 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
7022 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
7023 if ($readlimit) {
7024 print STDERR "applying read rate limit: $readlimit\n";
7025 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
7026 $add_pipe->($cstream);
7027 }
7028 }
7029 }
7030
7031 if ($comp) {
7032 my $info = PVE::Storage::decompressor_info('vma', $comp);
7033 my $cmd = $info->{decompressor};
7034 push @$cmd, $readfrom;
7035 $add_pipe->($cmd);
7036 }
7037
7038 my $tmpdir = "/var/tmp/vzdumptmp$$";
7039 rmtree $tmpdir;
7040
7041 # disable interrupts (always do cleanups)
7042 local $SIG{INT} =
7043 local $SIG{TERM} =
7044 local $SIG{QUIT} =
7045 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
7046
7047 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
7048 POSIX::mkfifo($mapfifo, 0600);
7049 my $fifofh;
7050 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
7051
7052 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
7053
7054 my $oldtimeout;
7055 my $timeout = 5;
7056
7057 my $devinfo = {}; # info about drives included in backup
7058 my $virtdev_hash = {}; # info about allocated drives
7059
7060 my $rpcenv = PVE::RPCEnvironment::get();
7061
7062 my $conffile = PVE::QemuConfig->config_file($vmid);
7063
7064 # Note: $oldconf is undef if VM does not exist
7065 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7066 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
7067 my $new_conf_raw = '';
7068
7069 my %storage_limits;
7070
7071 my $print_devmap = sub {
7072 my $cfgfn = "$tmpdir/qemu-server.conf";
7073
7074 # we can read the config - that is already extracted
7075 my $fh = IO::File->new($cfgfn, "r") ||
7076 die "unable to read qemu-server.conf - $!\n";
7077
7078 my $fwcfgfn = "$tmpdir/qemu-server.fw";
7079 if (-f $fwcfgfn) {
7080 my $pve_firewall_dir = '/etc/pve/firewall';
7081 mkdir $pve_firewall_dir; # make sure the dir exists
7082 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
7083 }
7084
7085 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
7086
7087 foreach my $info (values %{$virtdev_hash}) {
7088 my $storeid = $info->{storeid};
7089 next if defined($storage_limits{$storeid});
7090
7091 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
7092 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
7093 $storage_limits{$storeid} = $limit * 1024;
7094 }
7095
7096 foreach my $devname (keys %$devinfo) {
7097 die "found no device mapping information for device '$devname'\n"
7098 if !$devinfo->{$devname}->{virtdev};
7099 }
7100
7101 # create empty/temp config
7102 if ($oldconf) {
7103 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
7104 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
7105 }
7106
7107 # allocate volumes
7108 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
7109
7110 # print restore information to $fifofh
7111 foreach my $virtdev (sort keys %$virtdev_hash) {
7112 my $d = $virtdev_hash->{$virtdev};
7113 next if $d->{is_cloudinit}; # no need to restore cloudinit
7114
7115 my $storeid = $d->{storeid};
7116 my $volid = $d->{volid};
7117
7118 my $map_opts = '';
7119 if (my $limit = $storage_limits{$storeid}) {
7120 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
7121 }
7122
7123 my $write_zeros = 1;
7124 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
7125 $write_zeros = 0;
7126 }
7127
7128 my $path = PVE::Storage::path($cfg, $volid);
7129
7130 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
7131
7132 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
7133 }
7134
7135 $fh->seek(0, 0) || die "seek failed - $!\n";
7136
7137 my $cookie = { netcount => 0 };
7138 while (defined(my $line = <$fh>)) {
7139 $new_conf_raw .= restore_update_config_line(
7140 $cookie,
7141 $map,
7142 $line,
7143 $opts->{unique},
7144 );
7145 }
7146
7147 $fh->close();
7148 };
7149
7150 eval {
7151 # enable interrupts
7152 local $SIG{INT} =
7153 local $SIG{TERM} =
7154 local $SIG{QUIT} =
7155 local $SIG{HUP} =
7156 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7157 local $SIG{ALRM} = sub { die "got timeout\n"; };
7158
7159 $oldtimeout = alarm($timeout);
7160
7161 my $parser = sub {
7162 my $line = shift;
7163
7164 print "$line\n";
7165
7166 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
7167 my ($dev_id, $size, $devname) = ($1, $2, $3);
7168 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
7169 } elsif ($line =~ m/^CTIME: /) {
7170 # we correctly received the vma config, so we can disable
7171 # the timeout now for disk allocation (set to 10 minutes, so
7172 # that we always timeout if something goes wrong)
7173 alarm(600);
7174 &$print_devmap();
7175 print $fifofh "done\n";
7176 my $tmp = $oldtimeout || 0;
7177 $oldtimeout = undef;
7178 alarm($tmp);
7179 close($fifofh);
7180 $fifofh = undef;
7181 }
7182 };
7183
7184 print "restore vma archive: $dbg_cmdstring\n";
7185 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
7186 };
7187 my $err = $@;
7188
7189 alarm($oldtimeout) if $oldtimeout;
7190
7191 $restore_deactivate_volumes->($cfg, $virtdev_hash);
7192
7193 close($fifofh) if $fifofh;
7194 unlink $mapfifo;
7195 rmtree $tmpdir;
7196
7197 if ($err) {
7198 $restore_destroy_volumes->($cfg, $virtdev_hash);
7199 die $err;
7200 }
7201
7202 my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $opts->{override_conf});
7203 PVE::QemuConfig->write_config($vmid, $new_conf);
7204
7205 eval { rescan($vmid, 1); };
7206 warn $@ if $@;
7207
7208 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
7209 }
7210
7211 sub restore_tar_archive {
7212 my ($archive, $vmid, $user, $opts) = @_;
7213
7214 if (scalar(keys $opts->{override_conf}->%*) > 0) {
7215 my $keystring = join(' ', keys $opts->{override_conf}->%*);
7216 die "cannot pass along options ($keystring) when restoring from tar archive\n";
7217 }
7218
7219 if ($archive ne '-') {
7220 my $firstfile = tar_archive_read_firstfile($archive);
7221 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
7222 if $firstfile ne 'qemu-server.conf';
7223 }
7224
7225 my $storecfg = PVE::Storage::config();
7226
7227 # avoid zombie disks when restoring over an existing VM -> cleanup first
7228 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
7229 # skiplock=1 because qmrestore has set the 'create' lock itself already
7230 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
7231 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
7232
7233 my $tocmd = "/usr/lib/qemu-server/qmextract";
7234
7235 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
7236 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
7237 $tocmd .= ' --prealloc' if $opts->{prealloc};
7238 $tocmd .= ' --info' if $opts->{info};
7239
7240 # tar option "xf" does not autodetect compression when read from STDIN,
7241 # so we pipe to zcat
7242 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7243 PVE::Tools::shellquote("--to-command=$tocmd");
7244
7245 my $tmpdir = "/var/tmp/vzdumptmp$$";
7246 mkpath $tmpdir;
7247
7248 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7249 local $ENV{VZDUMP_VMID} = $vmid;
7250 local $ENV{VZDUMP_USER} = $user;
7251
7252 my $conffile = PVE::QemuConfig->config_file($vmid);
7253 my $new_conf_raw = '';
7254
7255 # disable interrupts (always do cleanups)
7256 local $SIG{INT} =
7257 local $SIG{TERM} =
7258 local $SIG{QUIT} =
7259 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7260
7261 eval {
7262 # enable interrupts
7263 local $SIG{INT} =
7264 local $SIG{TERM} =
7265 local $SIG{QUIT} =
7266 local $SIG{HUP} =
7267 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7268
7269 if ($archive eq '-') {
7270 print "extracting archive from STDIN\n";
7271 run_command($cmd, input => "<&STDIN");
7272 } else {
7273 print "extracting archive '$archive'\n";
7274 run_command($cmd);
7275 }
7276
7277 return if $opts->{info};
7278
7279 # read new mapping
7280 my $map = {};
7281 my $statfile = "$tmpdir/qmrestore.stat";
7282 if (my $fd = IO::File->new($statfile, "r")) {
7283 while (defined (my $line = <$fd>)) {
7284 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7285 $map->{$1} = $2 if $1;
7286 } else {
7287 print STDERR "unable to parse line in statfile - $line\n";
7288 }
7289 }
7290 $fd->close();
7291 }
7292
7293 my $confsrc = "$tmpdir/qemu-server.conf";
7294
7295 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
7296
7297 my $cookie = { netcount => 0 };
7298 while (defined (my $line = <$srcfd>)) {
7299 $new_conf_raw .= restore_update_config_line(
7300 $cookie,
7301 $map,
7302 $line,
7303 $opts->{unique},
7304 );
7305 }
7306
7307 $srcfd->close();
7308 };
7309 if (my $err = $@) {
7310 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7311 die $err;
7312 }
7313
7314 rmtree $tmpdir;
7315
7316 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7317
7318 PVE::Cluster::cfs_update(); # make sure we read new file
7319
7320 eval { rescan($vmid, 1); };
7321 warn $@ if $@;
7322 };
7323
7324 sub foreach_storage_used_by_vm {
7325 my ($conf, $func) = @_;
7326
7327 my $sidhash = {};
7328
7329 PVE::QemuConfig->foreach_volume($conf, sub {
7330 my ($ds, $drive) = @_;
7331 return if drive_is_cdrom($drive);
7332
7333 my $volid = $drive->{file};
7334
7335 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7336 $sidhash->{$sid} = $sid if $sid;
7337 });
7338
7339 foreach my $sid (sort keys %$sidhash) {
7340 &$func($sid);
7341 }
7342 }
7343
7344 my $qemu_snap_storage = {
7345 rbd => 1,
7346 };
7347 sub do_snapshots_with_qemu {
7348 my ($storecfg, $volid, $deviceid) = @_;
7349
7350 return if $deviceid =~ m/tpmstate0/;
7351
7352 my $storage_name = PVE::Storage::parse_volume_id($volid);
7353 my $scfg = $storecfg->{ids}->{$storage_name};
7354 die "could not find storage '$storage_name'\n" if !defined($scfg);
7355
7356 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7357 return 1;
7358 }
7359
7360 if ($volid =~ m/\.(qcow2|qed)$/){
7361 return 1;
7362 }
7363
7364 return;
7365 }
7366
7367 sub qga_check_running {
7368 my ($vmid, $nowarn) = @_;
7369
7370 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7371 if ($@) {
7372 warn "Qemu Guest Agent is not running - $@" if !$nowarn;
7373 return 0;
7374 }
7375 return 1;
7376 }
7377
7378 sub template_create {
7379 my ($vmid, $conf, $disk) = @_;
7380
7381 my $storecfg = PVE::Storage::config();
7382
7383 PVE::QemuConfig->foreach_volume($conf, sub {
7384 my ($ds, $drive) = @_;
7385
7386 return if drive_is_cdrom($drive);
7387 return if $disk && $ds ne $disk;
7388
7389 my $volid = $drive->{file};
7390 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7391
7392 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7393 $drive->{file} = $voliddst;
7394 $conf->{$ds} = print_drive($drive);
7395 PVE::QemuConfig->write_config($vmid, $conf);
7396 });
7397 }
7398
7399 sub convert_iscsi_path {
7400 my ($path) = @_;
7401
7402 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7403 my $portal = $1;
7404 my $target = $2;
7405 my $lun = $3;
7406
7407 my $initiator_name = get_initiator_name();
7408
7409 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7410 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7411 }
7412
7413 die "cannot convert iscsi path '$path', unkown format\n";
7414 }
7415
7416 sub qemu_img_convert {
7417 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized) = @_;
7418
7419 my $storecfg = PVE::Storage::config();
7420 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7421 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7422
7423 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7424
7425 my $cachemode;
7426 my $src_path;
7427 my $src_is_iscsi = 0;
7428 my $src_format;
7429
7430 if ($src_storeid) {
7431 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7432 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7433 $src_format = qemu_img_format($src_scfg, $src_volname);
7434 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7435 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7436 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7437 } elsif (-f $src_volid || -b $src_volid) {
7438 $src_path = $src_volid;
7439 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7440 $src_format = $1;
7441 }
7442 }
7443
7444 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7445
7446 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7447 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7448 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7449 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7450
7451 my $cmd = [];
7452 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7453 push @$cmd, '-l', "snapshot.name=$snapname"
7454 if $snapname && $src_format && $src_format eq "qcow2";
7455 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7456 push @$cmd, '-T', $cachemode if defined($cachemode);
7457
7458 if ($src_is_iscsi) {
7459 push @$cmd, '--image-opts';
7460 $src_path = convert_iscsi_path($src_path);
7461 } elsif ($src_format) {
7462 push @$cmd, '-f', $src_format;
7463 }
7464
7465 if ($dst_is_iscsi) {
7466 push @$cmd, '--target-image-opts';
7467 $dst_path = convert_iscsi_path($dst_path);
7468 } else {
7469 push @$cmd, '-O', $dst_format;
7470 }
7471
7472 push @$cmd, $src_path;
7473
7474 if (!$dst_is_iscsi && $is_zero_initialized) {
7475 push @$cmd, "zeroinit:$dst_path";
7476 } else {
7477 push @$cmd, $dst_path;
7478 }
7479
7480 my $parser = sub {
7481 my $line = shift;
7482 if($line =~ m/\((\S+)\/100\%\)/){
7483 my $percent = $1;
7484 my $transferred = int($size * $percent / 100);
7485 my $total_h = render_bytes($size, 1);
7486 my $transferred_h = render_bytes($transferred, 1);
7487
7488 print "transferred $transferred_h of $total_h ($percent%)\n";
7489 }
7490
7491 };
7492
7493 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7494 my $err = $@;
7495 die "copy failed: $err" if $err;
7496 }
7497
7498 sub qemu_img_format {
7499 my ($scfg, $volname) = @_;
7500
7501 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7502 return $1;
7503 } else {
7504 return "raw";
7505 }
7506 }
7507
7508 sub qemu_drive_mirror {
7509 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7510
7511 $jobs = {} if !$jobs;
7512
7513 my $qemu_target;
7514 my $format;
7515 $jobs->{"drive-$drive"} = {};
7516
7517 if ($dst_volid =~ /^nbd:/) {
7518 $qemu_target = $dst_volid;
7519 $format = "nbd";
7520 } else {
7521 my $storecfg = PVE::Storage::config();
7522 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7523
7524 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7525
7526 $format = qemu_img_format($dst_scfg, $dst_volname);
7527
7528 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7529
7530 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7531 }
7532
7533 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7534 $opts->{format} = $format if $format;
7535
7536 if (defined($src_bitmap)) {
7537 $opts->{sync} = 'incremental';
7538 $opts->{bitmap} = $src_bitmap;
7539 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7540 }
7541
7542 if (defined($bwlimit)) {
7543 $opts->{speed} = $bwlimit * 1024;
7544 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7545 } else {
7546 print "drive mirror is starting for drive-$drive\n";
7547 }
7548
7549 # if a job already runs for this device we get an error, catch it for cleanup
7550 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7551 if (my $err = $@) {
7552 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7553 warn "$@\n" if $@;
7554 die "mirroring error: $err\n";
7555 }
7556
7557 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7558 }
7559
7560 # $completion can be either
7561 # 'complete': wait until all jobs are ready, block-job-complete them (default)
7562 # 'cancel': wait until all jobs are ready, block-job-cancel them
7563 # 'skip': wait until all jobs are ready, return with block jobs in ready state
7564 # 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7565 sub qemu_drive_mirror_monitor {
7566 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7567
7568 $completion //= 'complete';
7569 $op //= "mirror";
7570
7571 eval {
7572 my $err_complete = 0;
7573
7574 my $starttime = time ();
7575 while (1) {
7576 die "block job ('$op') timed out\n" if $err_complete > 300;
7577
7578 my $stats = mon_cmd($vmid, "query-block-jobs");
7579 my $ctime = time();
7580
7581 my $running_jobs = {};
7582 for my $stat (@$stats) {
7583 next if $stat->{type} ne $op;
7584 $running_jobs->{$stat->{device}} = $stat;
7585 }
7586
7587 my $readycounter = 0;
7588
7589 for my $job_id (sort keys %$jobs) {
7590 my $job = $running_jobs->{$job_id};
7591
7592 my $vanished = !defined($job);
7593 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7594 if($complete || ($vanished && $completion eq 'auto')) {
7595 print "$job_id: $op-job finished\n";
7596 delete $jobs->{$job_id};
7597 next;
7598 }
7599
7600 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7601
7602 my $busy = $job->{busy};
7603 my $ready = $job->{ready};
7604 if (my $total = $job->{len}) {
7605 my $transferred = $job->{offset} || 0;
7606 my $remaining = $total - $transferred;
7607 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7608
7609 my $duration = $ctime - $starttime;
7610 my $total_h = render_bytes($total, 1);
7611 my $transferred_h = render_bytes($transferred, 1);
7612
7613 my $status = sprintf(
7614 "transferred $transferred_h of $total_h ($percent%%) in %s",
7615 render_duration($duration),
7616 );
7617
7618 if ($ready) {
7619 if ($busy) {
7620 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7621 } else {
7622 $status .= ", ready";
7623 }
7624 }
7625 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7626 $jobs->{$job_id}->{ready} = $ready;
7627 }
7628
7629 $readycounter++ if $job->{ready};
7630 }
7631
7632 last if scalar(keys %$jobs) == 0;
7633
7634 if ($readycounter == scalar(keys %$jobs)) {
7635 print "all '$op' jobs are ready\n";
7636
7637 # do the complete later (or has already been done)
7638 last if $completion eq 'skip' || $completion eq 'auto';
7639
7640 if ($vmiddst && $vmiddst != $vmid) {
7641 my $agent_running = $qga && qga_check_running($vmid);
7642 if ($agent_running) {
7643 print "freeze filesystem\n";
7644 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
7645 warn $@ if $@;
7646 } else {
7647 print "suspend vm\n";
7648 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
7649 warn $@ if $@;
7650 }
7651
7652 # if we clone a disk for a new target vm, we don't switch the disk
7653 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
7654
7655 if ($agent_running) {
7656 print "unfreeze filesystem\n";
7657 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
7658 warn $@ if $@;
7659 } else {
7660 print "resume vm\n";
7661 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7662 warn $@ if $@;
7663 }
7664
7665 last;
7666 } else {
7667
7668 for my $job_id (sort keys %$jobs) {
7669 # try to switch the disk if source and destination are on the same guest
7670 print "$job_id: Completing block job_id...\n";
7671
7672 my $op;
7673 if ($completion eq 'complete') {
7674 $op = 'block-job-complete';
7675 } elsif ($completion eq 'cancel') {
7676 $op = 'block-job-cancel';
7677 } else {
7678 die "invalid completion value: $completion\n";
7679 }
7680 eval { mon_cmd($vmid, $op, device => $job_id) };
7681 if ($@ =~ m/cannot be completed/) {
7682 print "$job_id: block job cannot be completed, trying again.\n";
7683 $err_complete++;
7684 }else {
7685 print "$job_id: Completed successfully.\n";
7686 $jobs->{$job_id}->{complete} = 1;
7687 }
7688 }
7689 }
7690 }
7691 sleep 1;
7692 }
7693 };
7694 my $err = $@;
7695
7696 if ($err) {
7697 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7698 die "block job ($op) error: $err";
7699 }
7700 }
7701
7702 sub qemu_blockjobs_cancel {
7703 my ($vmid, $jobs) = @_;
7704
7705 foreach my $job (keys %$jobs) {
7706 print "$job: Cancelling block job\n";
7707 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
7708 $jobs->{$job}->{cancel} = 1;
7709 }
7710
7711 while (1) {
7712 my $stats = mon_cmd($vmid, "query-block-jobs");
7713
7714 my $running_jobs = {};
7715 foreach my $stat (@$stats) {
7716 $running_jobs->{$stat->{device}} = $stat;
7717 }
7718
7719 foreach my $job (keys %$jobs) {
7720
7721 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
7722 print "$job: Done.\n";
7723 delete $jobs->{$job};
7724 }
7725 }
7726
7727 last if scalar(keys %$jobs) == 0;
7728
7729 sleep 1;
7730 }
7731 }
7732
7733 sub clone_disk {
7734 my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
7735
7736 my ($vmid, $running) = $source->@{qw(vmid running)};
7737 my ($src_drivename, $drive, $snapname) = $source->@{qw(drivename drive snapname)};
7738
7739 my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)};
7740 my ($storage, $format) = $dest->@{qw(storage format)};
7741
7742 my $use_drive_mirror = $full && $running && $src_drivename && !$snapname;
7743
7744 if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) {
7745 die "cloning from/to EFI disk requires EFI disk\n"
7746 if $src_drivename eq 'efidisk0' || $dst_drivename eq 'efidisk0';
7747 die "cloning from/to TPM state requires TPM state\n"
7748 if $src_drivename eq 'tpmstate0' || $dst_drivename eq 'tpmstate0';
7749
7750 # This would lead to two device nodes in QEMU pointing to the same backing image!
7751 die "cannot change drive name when cloning disk from/to the same VM\n"
7752 if $use_drive_mirror && $vmid == $newvmid;
7753 }
7754
7755 die "cannot move TPM state while VM is running\n"
7756 if $use_drive_mirror && $src_drivename eq 'tpmstate0';
7757
7758 my $newvolid;
7759
7760 print "create " . ($full ? 'full' : 'linked') . " clone of drive ";
7761 print "$src_drivename " if $src_drivename;
7762 print "($drive->{file})\n";
7763
7764 if (!$full) {
7765 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
7766 push @$newvollist, $newvolid;
7767 } else {
7768
7769 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
7770 $storeid = $storage if $storage;
7771
7772 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
7773
7774 my $name = undef;
7775 my $size = undef;
7776 if (drive_is_cloudinit($drive)) {
7777 $name = "vm-$newvmid-cloudinit";
7778 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7779 if ($scfg->{path}) {
7780 $name .= ".$dst_format";
7781 }
7782 $snapname = undef;
7783 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
7784 } elsif ($dst_drivename eq 'efidisk0') {
7785 $size = $efisize or die "internal error - need to specify EFI disk size\n";
7786 } elsif ($dst_drivename eq 'tpmstate0') {
7787 $dst_format = 'raw';
7788 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7789 } else {
7790 ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
7791 }
7792 $newvolid = PVE::Storage::vdisk_alloc(
7793 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
7794 );
7795 push @$newvollist, $newvolid;
7796
7797 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
7798
7799 if (drive_is_cloudinit($drive)) {
7800 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
7801 # if this is the case, we have to complete any block-jobs still there from
7802 # previous drive-mirrors
7803 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
7804 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
7805 }
7806 goto no_data_clone;
7807 }
7808
7809 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
7810 if ($use_drive_mirror) {
7811 qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
7812 $completion, $qga, $bwlimit);
7813 } else {
7814 # TODO: handle bwlimits
7815 if ($dst_drivename eq 'efidisk0') {
7816 # the relevant data on the efidisk may be smaller than the source
7817 # e.g. on RBD/ZFS, so we use dd to copy only the amount
7818 # that is given by the OVMF_VARS.fd
7819 my $src_path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
7820 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
7821
7822 my $src_format = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[6];
7823
7824 # better for Ceph if block size is not too small, see bug #3324
7825 my $bs = 1024*1024;
7826
7827 my $cmd = ['qemu-img', 'dd', '-n', '-O', $dst_format];
7828
7829 if ($src_format eq 'qcow2' && $snapname) {
7830 die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
7831 if !min_version(kvm_user_version(), 6, 2);
7832 push $cmd->@*, '-l', $snapname;
7833 }
7834 push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
7835 run_command($cmd);
7836 } else {
7837 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit);
7838 }
7839 }
7840 }
7841
7842 no_data_clone:
7843 my ($size) = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
7844
7845 my $disk = dclone($drive);
7846 delete $disk->{format};
7847 $disk->{file} = $newvolid;
7848 $disk->{size} = $size if defined($size);
7849
7850 return $disk;
7851 }
7852
7853 sub get_running_qemu_version {
7854 my ($vmid) = @_;
7855 my $res = mon_cmd($vmid, "query-version");
7856 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
7857 }
7858
7859 sub qemu_use_old_bios_files {
7860 my ($machine_type) = @_;
7861
7862 return if !$machine_type;
7863
7864 my $use_old_bios_files = undef;
7865
7866 if ($machine_type =~ m/^(\S+)\.pxe$/) {
7867 $machine_type = $1;
7868 $use_old_bios_files = 1;
7869 } else {
7870 my $version = extract_version($machine_type, kvm_user_version());
7871 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
7872 # load new efi bios files on migration. So this hack is required to allow
7873 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
7874 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
7875 $use_old_bios_files = !min_version($version, 2, 4);
7876 }
7877
7878 return ($use_old_bios_files, $machine_type);
7879 }
7880
7881 sub get_efivars_size {
7882 my ($conf, $efidisk) = @_;
7883
7884 my $arch = get_vm_arch($conf);
7885 $efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
7886 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
7887 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7888 die "uefi vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
7889 return -s $ovmf_vars;
7890 }
7891
7892 sub update_efidisk_size {
7893 my ($conf) = @_;
7894
7895 return if !defined($conf->{efidisk0});
7896
7897 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
7898 $disk->{size} = get_efivars_size($conf);
7899 $conf->{efidisk0} = print_drive($disk);
7900
7901 return;
7902 }
7903
7904 sub update_tpmstate_size {
7905 my ($conf) = @_;
7906
7907 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
7908 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7909 $conf->{tpmstate0} = print_drive($disk);
7910 }
7911
7912 sub create_efidisk($$$$$$$) {
7913 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
7914
7915 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7916 die "EFI vars default image not found\n" if ! -f $ovmf_vars;
7917
7918 my $vars_size_b = -s $ovmf_vars;
7919 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
7920 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
7921 PVE::Storage::activate_volumes($storecfg, [$volid]);
7922
7923 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
7924 my ($size) = PVE::Storage::volume_size_info($storecfg, $volid, 3);
7925
7926 return ($volid, $size/1024);
7927 }
7928
7929 sub vm_iothreads_list {
7930 my ($vmid) = @_;
7931
7932 my $res = mon_cmd($vmid, 'query-iothreads');
7933
7934 my $iothreads = {};
7935 foreach my $iothread (@$res) {
7936 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
7937 }
7938
7939 return $iothreads;
7940 }
7941
7942 sub scsihw_infos {
7943 my ($conf, $drive) = @_;
7944
7945 my $maxdev = 0;
7946
7947 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
7948 $maxdev = 7;
7949 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
7950 $maxdev = 1;
7951 } else {
7952 $maxdev = 256;
7953 }
7954
7955 my $controller = int($drive->{index} / $maxdev);
7956 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
7957 ? "virtioscsi"
7958 : "scsihw";
7959
7960 return ($maxdev, $controller, $controller_prefix);
7961 }
7962
7963 sub resolve_dst_disk_format {
7964 my ($storecfg, $storeid, $src_volname, $format) = @_;
7965 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
7966
7967 if (!$format) {
7968 # if no target format is specified, use the source disk format as hint
7969 if ($src_volname) {
7970 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7971 $format = qemu_img_format($scfg, $src_volname);
7972 } else {
7973 return $defFormat;
7974 }
7975 }
7976
7977 # test if requested format is supported - else use default
7978 my $supported = grep { $_ eq $format } @$validFormats;
7979 $format = $defFormat if !$supported;
7980 return $format;
7981 }
7982
7983 # NOTE: if this logic changes, please update docs & possibly gui logic
7984 sub find_vmstate_storage {
7985 my ($conf, $storecfg) = @_;
7986
7987 # first, return storage from conf if set
7988 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
7989
7990 my ($target, $shared, $local);
7991
7992 foreach_storage_used_by_vm($conf, sub {
7993 my ($sid) = @_;
7994 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
7995 my $dst = $scfg->{shared} ? \$shared : \$local;
7996 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
7997 });
7998
7999 # second, use shared storage where VM has at least one disk
8000 # third, use local storage where VM has at least one disk
8001 # fall back to local storage
8002 $target = $shared // $local // 'local';
8003
8004 return $target;
8005 }
8006
8007 sub generate_uuid {
8008 my ($uuid, $uuid_str);
8009 UUID::generate($uuid);
8010 UUID::unparse($uuid, $uuid_str);
8011 return $uuid_str;
8012 }
8013
8014 sub generate_smbios1_uuid {
8015 return "uuid=".generate_uuid();
8016 }
8017
8018 sub nbd_stop {
8019 my ($vmid) = @_;
8020
8021 mon_cmd($vmid, 'nbd-server-stop');
8022 }
8023
8024 sub create_reboot_request {
8025 my ($vmid) = @_;
8026 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
8027 or die "failed to create reboot trigger file: $!\n";
8028 close($fh);
8029 }
8030
8031 sub clear_reboot_request {
8032 my ($vmid) = @_;
8033 my $path = "/run/qemu-server/$vmid.reboot";
8034 my $res = 0;
8035
8036 $res = unlink($path);
8037 die "could not remove reboot request for $vmid: $!"
8038 if !$res && $! != POSIX::ENOENT;
8039
8040 return $res;
8041 }
8042
8043 sub bootorder_from_legacy {
8044 my ($conf, $bootcfg) = @_;
8045
8046 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
8047 my $bootindex_hash = {};
8048 my $i = 1;
8049 foreach my $o (split(//, $boot)) {
8050 $bootindex_hash->{$o} = $i*100;
8051 $i++;
8052 }
8053
8054 my $bootorder = {};
8055
8056 PVE::QemuConfig->foreach_volume($conf, sub {
8057 my ($ds, $drive) = @_;
8058
8059 if (drive_is_cdrom ($drive, 1)) {
8060 if ($bootindex_hash->{d}) {
8061 $bootorder->{$ds} = $bootindex_hash->{d};
8062 $bootindex_hash->{d} += 1;
8063 }
8064 } elsif ($bootindex_hash->{c}) {
8065 $bootorder->{$ds} = $bootindex_hash->{c}
8066 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
8067 $bootindex_hash->{c} += 1;
8068 }
8069 });
8070
8071 if ($bootindex_hash->{n}) {
8072 for (my $i = 0; $i < $MAX_NETS; $i++) {
8073 my $netname = "net$i";
8074 next if !$conf->{$netname};
8075 $bootorder->{$netname} = $bootindex_hash->{n};
8076 $bootindex_hash->{n} += 1;
8077 }
8078 }
8079
8080 return $bootorder;
8081 }
8082
8083 # Generate default device list for 'boot: order=' property. Matches legacy
8084 # default boot order, but with explicit device names. This is important, since
8085 # the fallback for when neither 'order' nor the old format is specified relies
8086 # on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
8087 sub get_default_bootdevices {
8088 my ($conf) = @_;
8089
8090 my @ret = ();
8091
8092 # harddisk
8093 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
8094 push @ret, $first if $first;
8095
8096 # cdrom
8097 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
8098 push @ret, $first if $first;
8099
8100 # network
8101 for (my $i = 0; $i < $MAX_NETS; $i++) {
8102 my $netname = "net$i";
8103 next if !$conf->{$netname};
8104 push @ret, $netname;
8105 last;
8106 }
8107
8108 return \@ret;
8109 }
8110
8111 sub device_bootorder {
8112 my ($conf) = @_;
8113
8114 return bootorder_from_legacy($conf) if !defined($conf->{boot});
8115
8116 my $boot = parse_property_string($boot_fmt, $conf->{boot});
8117
8118 my $bootorder = {};
8119 if (!defined($boot) || $boot->{legacy}) {
8120 $bootorder = bootorder_from_legacy($conf, $boot);
8121 } elsif ($boot->{order}) {
8122 my $i = 100; # start at 100 to allow user to insert devices before us with -args
8123 for my $dev (PVE::Tools::split_list($boot->{order})) {
8124 $bootorder->{$dev} = $i++;
8125 }
8126 }
8127
8128 return $bootorder;
8129 }
8130
8131 sub register_qmeventd_handle {
8132 my ($vmid) = @_;
8133
8134 my $fh;
8135 my $peer = "/var/run/qmeventd.sock";
8136 my $count = 0;
8137
8138 for (;;) {
8139 $count++;
8140 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
8141 last if $fh;
8142 if ($! != EINTR && $! != EAGAIN) {
8143 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
8144 }
8145 if ($count > 4) {
8146 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
8147 . "after $count retries\n";
8148 }
8149 usleep(25000);
8150 }
8151
8152 # send handshake to mark VM as backing up
8153 print $fh to_json({vzdump => {vmid => "$vmid"}});
8154
8155 # return handle to be closed later when inhibit is no longer required
8156 return $fh;
8157 }
8158
8159 # bash completion helper
8160
8161 sub complete_backup_archives {
8162 my ($cmdname, $pname, $cvalue) = @_;
8163
8164 my $cfg = PVE::Storage::config();
8165
8166 my $storeid;
8167
8168 if ($cvalue =~ m/^([^:]+):/) {
8169 $storeid = $1;
8170 }
8171
8172 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
8173
8174 my $res = [];
8175 foreach my $id (keys %$data) {
8176 foreach my $item (@{$data->{$id}}) {
8177 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
8178 push @$res, $item->{volid} if defined($item->{volid});
8179 }
8180 }
8181
8182 return $res;
8183 }
8184
8185 my $complete_vmid_full = sub {
8186 my ($running) = @_;
8187
8188 my $idlist = vmstatus();
8189
8190 my $res = [];
8191
8192 foreach my $id (keys %$idlist) {
8193 my $d = $idlist->{$id};
8194 if (defined($running)) {
8195 next if $d->{template};
8196 next if $running && $d->{status} ne 'running';
8197 next if !$running && $d->{status} eq 'running';
8198 }
8199 push @$res, $id;
8200
8201 }
8202 return $res;
8203 };
8204
8205 sub complete_vmid {
8206 return &$complete_vmid_full();
8207 }
8208
8209 sub complete_vmid_stopped {
8210 return &$complete_vmid_full(0);
8211 }
8212
8213 sub complete_vmid_running {
8214 return &$complete_vmid_full(1);
8215 }
8216
8217 sub complete_storage {
8218
8219 my $cfg = PVE::Storage::config();
8220 my $ids = $cfg->{ids};
8221
8222 my $res = [];
8223 foreach my $sid (keys %$ids) {
8224 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
8225 next if !$ids->{$sid}->{content}->{images};
8226 push @$res, $sid;
8227 }
8228
8229 return $res;
8230 }
8231
8232 sub complete_migration_storage {
8233 my ($cmd, $param, $current_value, $all_args) = @_;
8234
8235 my $targetnode = @$all_args[1];
8236
8237 my $cfg = PVE::Storage::config();
8238 my $ids = $cfg->{ids};
8239
8240 my $res = [];
8241 foreach my $sid (keys %$ids) {
8242 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
8243 next if !$ids->{$sid}->{content}->{images};
8244 push @$res, $sid;
8245 }
8246
8247 return $res;
8248 }
8249
8250 sub vm_is_paused {
8251 my ($vmid) = @_;
8252 my $qmpstatus = eval {
8253 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8254 mon_cmd($vmid, "query-status");
8255 };
8256 warn "$@\n" if $@;
8257 return $qmpstatus && $qmpstatus->{status} eq "paused";
8258 }
8259
8260 sub check_volume_storage_type {
8261 my ($storecfg, $vol) = @_;
8262
8263 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8264 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8265 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8266
8267 die "storage '$storeid' does not support content-type '$vtype'\n"
8268 if !$scfg->{content}->{$vtype};
8269
8270 return 1;
8271 }
8272
8273 1;