]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
move get_cpuunits helper to guest-common
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5
6 use Cwd 'abs_path';
7 use Digest::SHA;
8 use Fcntl ':flock';
9 use Fcntl;
10 use File::Basename;
11 use File::Copy qw(copy);
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use IO::Dir;
16 use IO::File;
17 use IO::Handle;
18 use IO::Select;
19 use IO::Socket::UNIX;
20 use IPC::Open3;
21 use JSON;
22 use MIME::Base64;
23 use POSIX;
24 use Storable qw(dclone);
25 use Time::HiRes qw(gettimeofday usleep);
26 use URI::Escape;
27 use UUID;
28
29 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
30 use PVE::CGroup;
31 use PVE::DataCenterConfig;
32 use PVE::Exception qw(raise raise_param_exc);
33 use PVE::Format qw(render_duration render_bytes);
34 use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
35 use PVE::INotify;
36 use PVE::JSONSchema qw(get_standard_option parse_property_string);
37 use PVE::ProcFSTools;
38 use PVE::PBSClient;
39 use PVE::RESTEnvironment qw(log_warn);
40 use PVE::RPCEnvironment;
41 use PVE::Storage;
42 use PVE::SysFSTools;
43 use PVE::Systemd;
44 use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
45
46 use PVE::QMPClient;
47 use PVE::QemuConfig;
48 use PVE::QemuServer::Helpers qw(min_version config_aware_timeout);
49 use PVE::QemuServer::Cloudinit;
50 use PVE::QemuServer::CGroup;
51 use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
52 use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
53 use PVE::QemuServer::Machine;
54 use PVE::QemuServer::Memory;
55 use PVE::QemuServer::Monitor qw(mon_cmd);
56 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
57 use PVE::QemuServer::USB qw(parse_usb_device);
58
59 my $have_sdn;
60 eval {
61 require PVE::Network::SDN::Zones;
62 $have_sdn = 1;
63 };
64
65 my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
66 my $OVMF = {
67 x86_64 => {
68 '4m-no-smm' => [
69 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
70 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
71 ],
72 '4m-no-smm-ms' => [
73 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
74 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
75 ],
76 '4m' => [
77 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
78 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
79 ],
80 '4m-ms' => [
81 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
82 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
83 ],
84 default => [
85 "$EDK2_FW_BASE/OVMF_CODE.fd",
86 "$EDK2_FW_BASE/OVMF_VARS.fd",
87 ],
88 },
89 aarch64 => {
90 default => [
91 "$EDK2_FW_BASE/AAVMF_CODE.fd",
92 "$EDK2_FW_BASE/AAVMF_VARS.fd",
93 ],
94 },
95 };
96
97 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
98
99 # Note about locking: we use flock on the config file protect against concurent actions.
100 # Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
101 # 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
102 # But you can ignore this kind of lock with the --skiplock flag.
103
104 cfs_register_file('/qemu-server/',
105 \&parse_vm_config,
106 \&write_vm_config);
107
108 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
109 description => "Some command save/restore state from this location.",
110 type => 'string',
111 maxLength => 128,
112 optional => 1,
113 });
114
115 PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
116 description => "Specifies the Qemu machine type.",
117 type => 'string',
118 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
119 maxLength => 40,
120 optional => 1,
121 });
122
123 #no warnings 'redefine';
124
125 my $nodename_cache;
126 sub nodename {
127 $nodename_cache //= PVE::INotify::nodename();
128 return $nodename_cache;
129 }
130
131 my $watchdog_fmt = {
132 model => {
133 default_key => 1,
134 type => 'string',
135 enum => [qw(i6300esb ib700)],
136 description => "Watchdog type to emulate.",
137 default => 'i6300esb',
138 optional => 1,
139 },
140 action => {
141 type => 'string',
142 enum => [qw(reset shutdown poweroff pause debug none)],
143 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
144 optional => 1,
145 },
146 };
147 PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
148
149 my $agent_fmt = {
150 enabled => {
151 description => "Enable/disable communication with a Qemu Guest Agent (QGA) running in the VM.",
152 type => 'boolean',
153 default => 0,
154 default_key => 1,
155 },
156 fstrim_cloned_disks => {
157 description => "Run fstrim after moving a disk or migrating the VM.",
158 type => 'boolean',
159 optional => 1,
160 default => 0
161 },
162 type => {
163 description => "Select the agent type",
164 type => 'string',
165 default => 'virtio',
166 optional => 1,
167 enum => [qw(virtio isa)],
168 },
169 };
170
171 my $vga_fmt = {
172 type => {
173 description => "Select the VGA type.",
174 type => 'string',
175 default => 'std',
176 optional => 1,
177 default_key => 1,
178 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware)],
179 },
180 memory => {
181 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
182 type => 'integer',
183 optional => 1,
184 minimum => 4,
185 maximum => 512,
186 },
187 };
188
189 my $ivshmem_fmt = {
190 size => {
191 type => 'integer',
192 minimum => 1,
193 description => "The size of the file in MB.",
194 },
195 name => {
196 type => 'string',
197 pattern => '[a-zA-Z0-9\-]+',
198 optional => 1,
199 format_description => 'string',
200 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
201 },
202 };
203
204 my $audio_fmt = {
205 device => {
206 type => 'string',
207 enum => [qw(ich9-intel-hda intel-hda AC97)],
208 description => "Configure an audio device."
209 },
210 driver => {
211 type => 'string',
212 enum => ['spice', 'none'],
213 default => 'spice',
214 optional => 1,
215 description => "Driver backend for the audio device."
216 },
217 };
218
219 my $spice_enhancements_fmt = {
220 foldersharing => {
221 type => 'boolean',
222 optional => 1,
223 default => '0',
224 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
225 },
226 videostreaming => {
227 type => 'string',
228 enum => ['off', 'all', 'filter'],
229 default => 'off',
230 optional => 1,
231 description => "Enable video streaming. Uses compression for detected video streams."
232 },
233 };
234
235 my $rng_fmt = {
236 source => {
237 type => 'string',
238 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
239 default_key => 1,
240 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
241 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
242 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
243 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
244 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
245 ." a hardware RNG from the host.",
246 },
247 max_bytes => {
248 type => 'integer',
249 description => "Maximum bytes of entropy allowed to get injected into the guest every"
250 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
251 ." `0` to disable limiting (potentially dangerous!).",
252 optional => 1,
253
254 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
255 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
256 # reading from /dev/urandom
257 default => 1024,
258 },
259 period => {
260 type => 'integer',
261 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
262 ." the guest to retrieve another 'max_bytes' of entropy.",
263 optional => 1,
264 default => 1000,
265 },
266 };
267
268 my $meta_info_fmt = {
269 'ctime' => {
270 type => 'integer',
271 description => "The guest creation timestamp as UNIX epoch time",
272 minimum => 0,
273 optional => 1,
274 },
275 'creation-qemu' => {
276 type => 'string',
277 description => "The QEMU (machine) version from the time this VM was created.",
278 pattern => '\d+(\.\d+)+',
279 optional => 1,
280 },
281 };
282
283 my $confdesc = {
284 onboot => {
285 optional => 1,
286 type => 'boolean',
287 description => "Specifies whether a VM will be started during system bootup.",
288 default => 0,
289 },
290 autostart => {
291 optional => 1,
292 type => 'boolean',
293 description => "Automatic restart after crash (currently ignored).",
294 default => 0,
295 },
296 hotplug => {
297 optional => 1,
298 type => 'string', format => 'pve-hotplug-features',
299 description => "Selectively enable hotplug features. This is a comma separated list of"
300 ." hotplug features: 'network', 'disk', 'cpu', 'memory' and 'usb'. Use '0' to disable"
301 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`.",
302 default => 'network,disk,usb',
303 },
304 reboot => {
305 optional => 1,
306 type => 'boolean',
307 description => "Allow reboot. If set to '0' the VM exit on reboot.",
308 default => 1,
309 },
310 lock => {
311 optional => 1,
312 type => 'string',
313 description => "Lock/unlock the VM.",
314 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
315 },
316 cpulimit => {
317 optional => 1,
318 type => 'number',
319 description => "Limit of CPU usage.",
320 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
321 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
322 minimum => 0,
323 maximum => 128,
324 default => 0,
325 },
326 cpuunits => {
327 optional => 1,
328 type => 'integer',
329 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
330 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
331 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
332 ." weights of all the other running VMs.",
333 minimum => 1,
334 maximum => 262144,
335 default => 'cgroup v1: 1024, cgroup v2: 100',
336 },
337 memory => {
338 optional => 1,
339 type => 'integer',
340 description => "Amount of RAM for the VM in MB. This is the maximum available memory when"
341 ." you use the balloon device.",
342 minimum => 16,
343 default => 512,
344 },
345 balloon => {
346 optional => 1,
347 type => 'integer',
348 description => "Amount of target RAM for the VM in MB. Using zero disables the ballon driver.",
349 minimum => 0,
350 },
351 shares => {
352 optional => 1,
353 type => 'integer',
354 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
355 ." more memory this VM gets. Number is relative to weights of all other running VMs."
356 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
357 minimum => 0,
358 maximum => 50000,
359 default => 1000,
360 },
361 keyboard => {
362 optional => 1,
363 type => 'string',
364 description => "Keyboard layout for VNC server. This option is generally not required and"
365 ." is often better handled from within the guest OS.",
366 enum => PVE::Tools::kvmkeymaplist(),
367 default => undef,
368 },
369 name => {
370 optional => 1,
371 type => 'string', format => 'dns-name',
372 description => "Set a name for the VM. Only used on the configuration web interface.",
373 },
374 scsihw => {
375 optional => 1,
376 type => 'string',
377 description => "SCSI controller model",
378 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
379 default => 'lsi',
380 },
381 description => {
382 optional => 1,
383 type => 'string',
384 description => "Description for the VM. Shown in the web-interface VM's summary."
385 ." This is saved as comment inside the configuration file.",
386 maxLength => 1024 * 8,
387 },
388 ostype => {
389 optional => 1,
390 type => 'string',
391 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
392 description => "Specify guest operating system.",
393 verbose_description => <<EODESC,
394 Specify guest operating system. This is used to enable special
395 optimization/features for specific operating systems:
396
397 [horizontal]
398 other;; unspecified OS
399 wxp;; Microsoft Windows XP
400 w2k;; Microsoft Windows 2000
401 w2k3;; Microsoft Windows 2003
402 w2k8;; Microsoft Windows 2008
403 wvista;; Microsoft Windows Vista
404 win7;; Microsoft Windows 7
405 win8;; Microsoft Windows 8/2012/2012r2
406 win10;; Microsoft Windows 10/2016/2019
407 win11;; Microsoft Windows 11/2022
408 l24;; Linux 2.4 Kernel
409 l26;; Linux 2.6 - 5.X Kernel
410 solaris;; Solaris/OpenSolaris/OpenIndiania kernel
411 EODESC
412 },
413 boot => {
414 optional => 1,
415 type => 'string', format => 'pve-qm-boot',
416 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
417 ." key or 'legacy=' is deprecated.",
418 },
419 bootdisk => {
420 optional => 1,
421 type => 'string', format => 'pve-qm-bootdisk',
422 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
423 pattern => '(ide|sata|scsi|virtio)\d+',
424 },
425 smp => {
426 optional => 1,
427 type => 'integer',
428 description => "The number of CPUs. Please use option -sockets instead.",
429 minimum => 1,
430 default => 1,
431 },
432 sockets => {
433 optional => 1,
434 type => 'integer',
435 description => "The number of CPU sockets.",
436 minimum => 1,
437 default => 1,
438 },
439 cores => {
440 optional => 1,
441 type => 'integer',
442 description => "The number of cores per socket.",
443 minimum => 1,
444 default => 1,
445 },
446 numa => {
447 optional => 1,
448 type => 'boolean',
449 description => "Enable/disable NUMA.",
450 default => 0,
451 },
452 hugepages => {
453 optional => 1,
454 type => 'string',
455 description => "Enable/disable hugepages memory.",
456 enum => [qw(any 2 1024)],
457 },
458 keephugepages => {
459 optional => 1,
460 type => 'boolean',
461 default => 0,
462 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
463 ." after VM shutdown and can be used for subsequent starts.",
464 },
465 vcpus => {
466 optional => 1,
467 type => 'integer',
468 description => "Number of hotplugged vcpus.",
469 minimum => 1,
470 default => 0,
471 },
472 acpi => {
473 optional => 1,
474 type => 'boolean',
475 description => "Enable/disable ACPI.",
476 default => 1,
477 },
478 agent => {
479 optional => 1,
480 description => "Enable/disable communication with the Qemu Guest Agent and its properties.",
481 type => 'string',
482 format => $agent_fmt,
483 },
484 kvm => {
485 optional => 1,
486 type => 'boolean',
487 description => "Enable/disable KVM hardware virtualization.",
488 default => 1,
489 },
490 tdf => {
491 optional => 1,
492 type => 'boolean',
493 description => "Enable/disable time drift fix.",
494 default => 0,
495 },
496 localtime => {
497 optional => 1,
498 type => 'boolean',
499 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
500 ." the `ostype` indicates a Microsoft Windows OS.",
501 },
502 freeze => {
503 optional => 1,
504 type => 'boolean',
505 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
506 },
507 vga => {
508 optional => 1,
509 type => 'string', format => $vga_fmt,
510 description => "Configure the VGA hardware.",
511 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
512 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
513 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
514 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
515 ." display server. For win* OS you can select how many independent displays you want,"
516 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
517 ." using a serial device as terminal.",
518 },
519 watchdog => {
520 optional => 1,
521 type => 'string', format => 'pve-qm-watchdog',
522 description => "Create a virtual hardware watchdog device.",
523 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
524 ." action), the watchdog must be periodically polled by an agent inside the guest or"
525 ." else the watchdog will reset the guest (or execute the respective action specified)",
526 },
527 startdate => {
528 optional => 1,
529 type => 'string',
530 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
531 description => "Set the initial date of the real time clock. Valid format for date are:"
532 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
533 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
534 default => 'now',
535 },
536 startup => get_standard_option('pve-startup-order'),
537 template => {
538 optional => 1,
539 type => 'boolean',
540 description => "Enable/disable Template.",
541 default => 0,
542 },
543 args => {
544 optional => 1,
545 type => 'string',
546 description => "Arbitrary arguments passed to kvm.",
547 verbose_description => <<EODESCR,
548 Arbitrary arguments passed to kvm, for example:
549
550 args: -no-reboot -no-hpet
551
552 NOTE: this option is for experts only.
553 EODESCR
554 },
555 tablet => {
556 optional => 1,
557 type => 'boolean',
558 default => 1,
559 description => "Enable/disable the USB tablet device.",
560 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
561 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
562 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
563 ." may consider disabling this to save some context switches. This is turned off by"
564 ." default if you use spice (`qm set <vmid> --vga qxl`).",
565 },
566 migrate_speed => {
567 optional => 1,
568 type => 'integer',
569 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
570 minimum => 0,
571 default => 0,
572 },
573 migrate_downtime => {
574 optional => 1,
575 type => 'number',
576 description => "Set maximum tolerated downtime (in seconds) for migrations.",
577 minimum => 0,
578 default => 0.1,
579 },
580 cdrom => {
581 optional => 1,
582 type => 'string', format => 'pve-qm-ide',
583 typetext => '<volume>',
584 description => "This is an alias for option -ide2",
585 },
586 cpu => {
587 optional => 1,
588 description => "Emulated CPU type.",
589 type => 'string',
590 format => 'pve-vm-cpu-conf',
591 },
592 parent => get_standard_option('pve-snapshot-name', {
593 optional => 1,
594 description => "Parent snapshot name. This is used internally, and should not be modified.",
595 }),
596 snaptime => {
597 optional => 1,
598 description => "Timestamp for snapshots.",
599 type => 'integer',
600 minimum => 0,
601 },
602 vmstate => {
603 optional => 1,
604 type => 'string', format => 'pve-volume-id',
605 description => "Reference to a volume which stores the VM state. This is used internally"
606 ." for snapshots.",
607 },
608 vmstatestorage => get_standard_option('pve-storage-id', {
609 description => "Default storage for VM state volumes/files.",
610 optional => 1,
611 }),
612 runningmachine => get_standard_option('pve-qemu-machine', {
613 description => "Specifies the QEMU machine type of the running vm. This is used internally"
614 ." for snapshots.",
615 }),
616 runningcpu => {
617 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
618 ." internally for snapshots.",
619 optional => 1,
620 type => 'string',
621 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
622 format_description => 'QEMU -cpu parameter'
623 },
624 machine => get_standard_option('pve-qemu-machine'),
625 arch => {
626 description => "Virtual processor architecture. Defaults to the host.",
627 optional => 1,
628 type => 'string',
629 enum => [qw(x86_64 aarch64)],
630 },
631 smbios1 => {
632 description => "Specify SMBIOS type 1 fields.",
633 type => 'string', format => 'pve-qm-smbios1',
634 maxLength => 512,
635 optional => 1,
636 },
637 protection => {
638 optional => 1,
639 type => 'boolean',
640 description => "Sets the protection flag of the VM. This will disable the remove VM and"
641 ." remove disk operations.",
642 default => 0,
643 },
644 bios => {
645 optional => 1,
646 type => 'string',
647 enum => [ qw(seabios ovmf) ],
648 description => "Select BIOS implementation.",
649 default => 'seabios',
650 },
651 vmgenid => {
652 type => 'string',
653 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
654 format_description => 'UUID',
655 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
656 ." to disable explicitly.",
657 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
658 ." value identifier to the guest OS. This allows to notify the guest operating system"
659 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
660 ." execution or creation from a template). The guest operating system notices the"
661 ." change, and is then able to react as appropriate by marking its copies of"
662 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
663 ."Note that auto-creation only works when done through API/CLI create or update methods"
664 .", but not when manually editing the config file.",
665 default => "1 (autogenerated)",
666 optional => 1,
667 },
668 hookscript => {
669 type => 'string',
670 format => 'pve-volume-id',
671 optional => 1,
672 description => "Script that will be executed during various steps in the vms lifetime.",
673 },
674 ivshmem => {
675 type => 'string',
676 format => $ivshmem_fmt,
677 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
678 ." the host.",
679 optional => 1,
680 },
681 audio0 => {
682 type => 'string',
683 format => $audio_fmt,
684 description => "Configure a audio device, useful in combination with QXL/Spice.",
685 optional => 1
686 },
687 spice_enhancements => {
688 type => 'string',
689 format => $spice_enhancements_fmt,
690 description => "Configure additional enhancements for SPICE.",
691 optional => 1
692 },
693 tags => {
694 type => 'string', format => 'pve-tag-list',
695 description => 'Tags of the VM. This is only meta information.',
696 optional => 1,
697 },
698 rng0 => {
699 type => 'string',
700 format => $rng_fmt,
701 description => "Configure a VirtIO-based Random Number Generator.",
702 optional => 1,
703 },
704 meta => {
705 type => 'string',
706 format => $meta_info_fmt,
707 description => "Some (read-only) meta-information about this guest.",
708 optional => 1,
709 },
710 };
711
712 my $cicustom_fmt = {
713 meta => {
714 type => 'string',
715 optional => 1,
716 description => 'Specify a custom file containing all meta data passed to the VM via"
717 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
718 format => 'pve-volume-id',
719 format_description => 'volume',
720 },
721 network => {
722 type => 'string',
723 optional => 1,
724 description => 'Specify a custom file containing all network data passed to the VM via'
725 .' cloud-init.',
726 format => 'pve-volume-id',
727 format_description => 'volume',
728 },
729 user => {
730 type => 'string',
731 optional => 1,
732 description => 'Specify a custom file containing all user data passed to the VM via'
733 .' cloud-init.',
734 format => 'pve-volume-id',
735 format_description => 'volume',
736 },
737 vendor => {
738 type => 'string',
739 optional => 1,
740 description => 'Specify a custom file containing all vendor data passed to the VM via'
741 .' cloud-init.',
742 format => 'pve-volume-id',
743 format_description => 'volume',
744 },
745 };
746 PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
747
748 my $confdesc_cloudinit = {
749 citype => {
750 optional => 1,
751 type => 'string',
752 description => 'Specifies the cloud-init configuration format. The default depends on the'
753 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
754 .' and `configdrive2` for windows.',
755 enum => ['configdrive2', 'nocloud', 'opennebula'],
756 },
757 ciuser => {
758 optional => 1,
759 type => 'string',
760 description => "cloud-init: User name to change ssh keys and password for instead of the"
761 ." image's configured default user.",
762 },
763 cipassword => {
764 optional => 1,
765 type => 'string',
766 description => 'cloud-init: Password to assign the user. Using this is generally not'
767 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
768 .' support hashed passwords.',
769 },
770 cicustom => {
771 optional => 1,
772 type => 'string',
773 description => 'cloud-init: Specify custom files to replace the automatically generated'
774 .' ones at start.',
775 format => 'pve-qm-cicustom',
776 },
777 searchdomain => {
778 optional => 1,
779 type => 'string',
780 description => 'cloud-init: Sets DNS search domains for a container. Create will'
781 .' automatically use the setting from the host if neither searchdomain nor nameserver'
782 .' are set.',
783 },
784 nameserver => {
785 optional => 1,
786 type => 'string', format => 'address-list',
787 description => 'cloud-init: Sets DNS server IP address for a container. Create will'
788 .' automatically use the setting from the host if neither searchdomain nor nameserver'
789 .' are set.',
790 },
791 sshkeys => {
792 optional => 1,
793 type => 'string',
794 format => 'urlencoded',
795 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
796 },
797 };
798
799 # what about other qemu settings ?
800 #cpu => 'string',
801 #machine => 'string',
802 #fda => 'file',
803 #fdb => 'file',
804 #mtdblock => 'file',
805 #sd => 'file',
806 #pflash => 'file',
807 #snapshot => 'bool',
808 #bootp => 'file',
809 ##tftp => 'dir',
810 ##smb => 'dir',
811 #kernel => 'file',
812 #append => 'string',
813 #initrd => 'file',
814 ##soundhw => 'string',
815
816 while (my ($k, $v) = each %$confdesc) {
817 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
818 }
819
820 my $MAX_USB_DEVICES = 5;
821 my $MAX_NETS = 32;
822 my $MAX_SERIAL_PORTS = 4;
823 my $MAX_PARALLEL_PORTS = 3;
824 my $MAX_NUMA = 8;
825
826 my $numa_fmt = {
827 cpus => {
828 type => "string",
829 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
830 description => "CPUs accessing this NUMA node.",
831 format_description => "id[-id];...",
832 },
833 memory => {
834 type => "number",
835 description => "Amount of memory this NUMA node provides.",
836 optional => 1,
837 },
838 hostnodes => {
839 type => "string",
840 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
841 description => "Host NUMA nodes to use.",
842 format_description => "id[-id];...",
843 optional => 1,
844 },
845 policy => {
846 type => 'string',
847 enum => [qw(preferred bind interleave)],
848 description => "NUMA allocation policy.",
849 optional => 1,
850 },
851 };
852 PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
853 my $numadesc = {
854 optional => 1,
855 type => 'string', format => $numa_fmt,
856 description => "NUMA topology.",
857 };
858 PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
859
860 for (my $i = 0; $i < $MAX_NUMA; $i++) {
861 $confdesc->{"numa$i"} = $numadesc;
862 }
863
864 my $nic_model_list = [
865 'e1000',
866 'e1000-82540em',
867 'e1000-82544gc',
868 'e1000-82545em',
869 'e1000e',
870 'i82551',
871 'i82557b',
872 'i82559er',
873 'ne2k_isa',
874 'ne2k_pci',
875 'pcnet',
876 'rtl8139',
877 'virtio',
878 'vmxnet3',
879 ];
880 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
881
882 my $net_fmt_bridge_descr = <<__EOD__;
883 Bridge to attach the network device to. The Proxmox VE standard bridge
884 is called 'vmbr0'.
885
886 If you do not specify a bridge, we create a kvm user (NATed) network
887 device, which provides DHCP and DNS services. The following addresses
888 are used:
889
890 10.0.2.2 Gateway
891 10.0.2.3 DNS Server
892 10.0.2.4 SMB Server
893
894 The DHCP server assign addresses to the guest starting from 10.0.2.15.
895 __EOD__
896
897 my $net_fmt = {
898 macaddr => get_standard_option('mac-addr', {
899 description => "MAC address. That address must be unique withing your network. This is"
900 ." automatically generated if not specified.",
901 }),
902 model => {
903 type => 'string',
904 description => "Network Card Model. The 'virtio' model provides the best performance with"
905 ." very low CPU overhead. If your guest does not support this driver, it is usually"
906 ." best to use 'e1000'.",
907 enum => $nic_model_list,
908 default_key => 1,
909 },
910 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
911 bridge => get_standard_option('pve-bridge-id', {
912 description => $net_fmt_bridge_descr,
913 optional => 1,
914 }),
915 queues => {
916 type => 'integer',
917 minimum => 0, maximum => 16,
918 description => 'Number of packet queues to be used on the device.',
919 optional => 1,
920 },
921 rate => {
922 type => 'number',
923 minimum => 0,
924 description => "Rate limit in mbps (megabytes per second) as floating point number.",
925 optional => 1,
926 },
927 tag => {
928 type => 'integer',
929 minimum => 1, maximum => 4094,
930 description => 'VLAN tag to apply to packets on this interface.',
931 optional => 1,
932 },
933 trunks => {
934 type => 'string',
935 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
936 description => 'VLAN trunks to pass through this interface.',
937 format_description => 'vlanid[;vlanid...]',
938 optional => 1,
939 },
940 firewall => {
941 type => 'boolean',
942 description => 'Whether this interface should be protected by the firewall.',
943 optional => 1,
944 },
945 link_down => {
946 type => 'boolean',
947 description => 'Whether this interface should be disconnected (like pulling the plug).',
948 optional => 1,
949 },
950 mtu => {
951 type => 'integer',
952 minimum => 1, maximum => 65520,
953 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
954 optional => 1,
955 },
956 };
957
958 my $netdesc = {
959 optional => 1,
960 type => 'string', format => $net_fmt,
961 description => "Specify network devices.",
962 };
963
964 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
965
966 my $ipconfig_fmt = {
967 ip => {
968 type => 'string',
969 format => 'pve-ipv4-config',
970 format_description => 'IPv4Format/CIDR',
971 description => 'IPv4 address in CIDR format.',
972 optional => 1,
973 default => 'dhcp',
974 },
975 gw => {
976 type => 'string',
977 format => 'ipv4',
978 format_description => 'GatewayIPv4',
979 description => 'Default gateway for IPv4 traffic.',
980 optional => 1,
981 requires => 'ip',
982 },
983 ip6 => {
984 type => 'string',
985 format => 'pve-ipv6-config',
986 format_description => 'IPv6Format/CIDR',
987 description => 'IPv6 address in CIDR format.',
988 optional => 1,
989 default => 'dhcp',
990 },
991 gw6 => {
992 type => 'string',
993 format => 'ipv6',
994 format_description => 'GatewayIPv6',
995 description => 'Default gateway for IPv6 traffic.',
996 optional => 1,
997 requires => 'ip6',
998 },
999 };
1000 PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
1001 my $ipconfigdesc = {
1002 optional => 1,
1003 type => 'string', format => 'pve-qm-ipconfig',
1004 description => <<'EODESCR',
1005 cloud-init: Specify IP addresses and gateways for the corresponding interface.
1006
1007 IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1008
1009 The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1010 gateway should be provided.
1011 For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1012 cloud-init 19.4 or newer.
1013
1014 If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1015 dhcp on IPv4.
1016 EODESCR
1017 };
1018 PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1019
1020 for (my $i = 0; $i < $MAX_NETS; $i++) {
1021 $confdesc->{"net$i"} = $netdesc;
1022 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1023 }
1024
1025 foreach my $key (keys %$confdesc_cloudinit) {
1026 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1027 }
1028
1029 PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1030 sub verify_volume_id_or_qm_path {
1031 my ($volid, $noerr) = @_;
1032
1033 return $volid if $volid eq 'none' || $volid eq 'cdrom';
1034
1035 return verify_volume_id_or_absolute_path($volid, $noerr);
1036 }
1037
1038 PVE::JSONSchema::register_format('pve-volume-id-or-absolute-path', \&verify_volume_id_or_absolute_path);
1039 sub verify_volume_id_or_absolute_path {
1040 my ($volid, $noerr) = @_;
1041
1042 return $volid if $volid =~ m|^/|;
1043
1044 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1045 if ($@) {
1046 return if $noerr;
1047 die $@;
1048 }
1049 return $volid;
1050 }
1051
1052 my $usb_fmt = {
1053 host => {
1054 default_key => 1,
1055 type => 'string', format => 'pve-qm-usb-device',
1056 format_description => 'HOSTUSBDEVICE|spice',
1057 description => <<EODESCR,
1058 The Host USB device or port or the value 'spice'. HOSTUSBDEVICE syntax is:
1059
1060 'bus-port(.port)*' (decimal numbers) or
1061 'vendor_id:product_id' (hexadeciaml numbers) or
1062 'spice'
1063
1064 You can use the 'lsusb -t' command to list existing usb devices.
1065
1066 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1067 machines - use with special care.
1068
1069 The value 'spice' can be used to add a usb redirection devices for spice.
1070 EODESCR
1071 },
1072 usb3 => {
1073 optional => 1,
1074 type => 'boolean',
1075 description => "Specifies whether if given host option is a USB3 device or port.",
1076 default => 0,
1077 },
1078 };
1079
1080 my $usbdesc = {
1081 optional => 1,
1082 type => 'string', format => $usb_fmt,
1083 description => "Configure an USB device (n is 0 to 4).",
1084 };
1085 PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
1086
1087 my $serialdesc = {
1088 optional => 1,
1089 type => 'string',
1090 pattern => '(/dev/.+|socket)',
1091 description => "Create a serial device inside the VM (n is 0 to 3)",
1092 verbose_description => <<EODESCR,
1093 Create a serial device inside the VM (n is 0 to 3), and pass through a
1094 host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1095 host side (use 'qm terminal' to open a terminal connection).
1096
1097 NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1098 use with special care.
1099
1100 CAUTION: Experimental! User reported problems with this option.
1101 EODESCR
1102 };
1103
1104 my $paralleldesc= {
1105 optional => 1,
1106 type => 'string',
1107 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1108 description => "Map host parallel devices (n is 0 to 2).",
1109 verbose_description => <<EODESCR,
1110 Map host parallel devices (n is 0 to 2).
1111
1112 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1113 machines - use with special care.
1114
1115 CAUTION: Experimental! User reported problems with this option.
1116 EODESCR
1117 };
1118
1119 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1120 $confdesc->{"parallel$i"} = $paralleldesc;
1121 }
1122
1123 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1124 $confdesc->{"serial$i"} = $serialdesc;
1125 }
1126
1127 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1128 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1129 }
1130
1131 for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1132 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1133 }
1134
1135 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1136 $confdesc->{"usb$i"} = $usbdesc;
1137 }
1138
1139 my $boot_fmt = {
1140 legacy => {
1141 optional => 1,
1142 default_key => 1,
1143 type => 'string',
1144 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1145 . " Deprecated, use 'order=' instead.",
1146 pattern => '[acdn]{1,4}',
1147 format_description => "[acdn]{1,4}",
1148
1149 # note: this is also the fallback if boot: is not given at all
1150 default => 'cdn',
1151 },
1152 order => {
1153 optional => 1,
1154 type => 'string',
1155 format => 'pve-qm-bootdev-list',
1156 format_description => "device[;device...]",
1157 description => <<EODESC,
1158 The guest will attempt to boot from devices in the order they appear here.
1159
1160 Disks, optical drives and passed-through storage USB devices will be directly
1161 booted from, NICs will load PXE, and PCIe devices will either behave like disks
1162 (e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1163
1164 Note that only devices in this list will be marked as bootable and thus loaded
1165 by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1166 (e.g. software-raid), you need to specify all of them here.
1167
1168 Overrides the deprecated 'legacy=[acdn]*' value when given.
1169 EODESC
1170 },
1171 };
1172 PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1173
1174 PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1175 sub verify_bootdev {
1176 my ($dev, $noerr) = @_;
1177
1178 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1179 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1180
1181 my $check = sub {
1182 my ($base) = @_;
1183 return 0 if $dev !~ m/^$base\d+$/;
1184 return 0 if !$confdesc->{$dev};
1185 return 1;
1186 };
1187
1188 return $dev if $check->("net");
1189 return $dev if $check->("usb");
1190 return $dev if $check->("hostpci");
1191
1192 return if $noerr;
1193 die "invalid boot device '$dev'\n";
1194 }
1195
1196 sub print_bootorder {
1197 my ($devs) = @_;
1198 return "" if !@$devs;
1199 my $data = { order => join(';', @$devs) };
1200 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1201 }
1202
1203 my $kvm_api_version = 0;
1204
1205 sub kvm_version {
1206 return $kvm_api_version if $kvm_api_version;
1207
1208 open my $fh, '<', '/dev/kvm' or return;
1209
1210 # 0xae00 => KVM_GET_API_VERSION
1211 $kvm_api_version = ioctl($fh, 0xae00, 0);
1212 close($fh);
1213
1214 return $kvm_api_version;
1215 }
1216
1217 my $kvm_user_version = {};
1218 my $kvm_mtime = {};
1219
1220 sub kvm_user_version {
1221 my ($binary) = @_;
1222
1223 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1224 my $st = stat($binary);
1225
1226 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1227 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1228 $cachedmtime == $st->mtime;
1229
1230 $kvm_user_version->{$binary} = 'unknown';
1231 $kvm_mtime->{$binary} = $st->mtime;
1232
1233 my $code = sub {
1234 my $line = shift;
1235 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1236 $kvm_user_version->{$binary} = $2;
1237 }
1238 };
1239
1240 eval { run_command([$binary, '--version'], outfunc => $code); };
1241 warn $@ if $@;
1242
1243 return $kvm_user_version->{$binary};
1244
1245 }
1246 my sub extract_version {
1247 my ($machine_type, $version) = @_;
1248 $version = kvm_user_version() if !defined($version);
1249 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
1250 }
1251
1252 sub kernel_has_vhost_net {
1253 return -c '/dev/vhost-net';
1254 }
1255
1256 sub option_exists {
1257 my $key = shift;
1258 return defined($confdesc->{$key});
1259 }
1260
1261 my $cdrom_path;
1262 sub get_cdrom_path {
1263
1264 return $cdrom_path if $cdrom_path;
1265
1266 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
1267 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
1268 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
1269 }
1270
1271 sub get_iso_path {
1272 my ($storecfg, $vmid, $cdrom) = @_;
1273
1274 if ($cdrom eq 'cdrom') {
1275 return get_cdrom_path();
1276 } elsif ($cdrom eq 'none') {
1277 return '';
1278 } elsif ($cdrom =~ m|^/|) {
1279 return $cdrom;
1280 } else {
1281 return PVE::Storage::path($storecfg, $cdrom);
1282 }
1283 }
1284
1285 # try to convert old style file names to volume IDs
1286 sub filename_to_volume_id {
1287 my ($vmid, $file, $media) = @_;
1288
1289 if (!($file eq 'none' || $file eq 'cdrom' ||
1290 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1291
1292 return if $file =~ m|/|;
1293
1294 if ($media && $media eq 'cdrom') {
1295 $file = "local:iso/$file";
1296 } else {
1297 $file = "local:$vmid/$file";
1298 }
1299 }
1300
1301 return $file;
1302 }
1303
1304 sub verify_media_type {
1305 my ($opt, $vtype, $media) = @_;
1306
1307 return if !$media;
1308
1309 my $etype;
1310 if ($media eq 'disk') {
1311 $etype = 'images';
1312 } elsif ($media eq 'cdrom') {
1313 $etype = 'iso';
1314 } else {
1315 die "internal error";
1316 }
1317
1318 return if ($vtype eq $etype);
1319
1320 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1321 }
1322
1323 sub cleanup_drive_path {
1324 my ($opt, $storecfg, $drive) = @_;
1325
1326 # try to convert filesystem paths to volume IDs
1327
1328 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1329 ($drive->{file} !~ m|^/dev/.+|) &&
1330 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1331 ($drive->{file} !~ m/^\d+$/)) {
1332 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1333 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1334 if !$vtype;
1335 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1336 verify_media_type($opt, $vtype, $drive->{media});
1337 $drive->{file} = $volid;
1338 }
1339
1340 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1341 }
1342
1343 sub parse_hotplug_features {
1344 my ($data) = @_;
1345
1346 my $res = {};
1347
1348 return $res if $data eq '0';
1349
1350 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1351
1352 foreach my $feature (PVE::Tools::split_list($data)) {
1353 if ($feature =~ m/^(network|disk|cpu|memory|usb)$/) {
1354 $res->{$1} = 1;
1355 } else {
1356 die "invalid hotplug feature '$feature'\n";
1357 }
1358 }
1359 return $res;
1360 }
1361
1362 PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1363 sub pve_verify_hotplug_features {
1364 my ($value, $noerr) = @_;
1365
1366 return $value if parse_hotplug_features($value);
1367
1368 return if $noerr;
1369
1370 die "unable to parse hotplug option\n";
1371 }
1372
1373 sub scsi_inquiry {
1374 my($fh, $noerr) = @_;
1375
1376 my $SG_IO = 0x2285;
1377 my $SG_GET_VERSION_NUM = 0x2282;
1378
1379 my $versionbuf = "\x00" x 8;
1380 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1381 if (!$ret) {
1382 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1383 return;
1384 }
1385 my $version = unpack("I", $versionbuf);
1386 if ($version < 30000) {
1387 die "scsi generic interface too old\n" if !$noerr;
1388 return;
1389 }
1390
1391 my $buf = "\x00" x 36;
1392 my $sensebuf = "\x00" x 8;
1393 my $cmd = pack("C x3 C x1", 0x12, 36);
1394
1395 # see /usr/include/scsi/sg.h
1396 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1397
1398 my $packet = pack(
1399 $sg_io_hdr_t, ord('S'), -3, length($cmd), length($sensebuf), 0, length($buf), $buf, $cmd, $sensebuf, 6000
1400 );
1401
1402 $ret = ioctl($fh, $SG_IO, $packet);
1403 if (!$ret) {
1404 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1405 return;
1406 }
1407
1408 my @res = unpack($sg_io_hdr_t, $packet);
1409 if ($res[17] || $res[18]) {
1410 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1411 return;
1412 }
1413
1414 my $res = {};
1415 $res->@{qw(type removable vendor product revision)} = unpack("C C x6 A8 A16 A4", $buf);
1416
1417 $res->{removable} = $res->{removable} & 128 ? 1 : 0;
1418 $res->{type} &= 0x1F;
1419
1420 return $res;
1421 }
1422
1423 sub path_is_scsi {
1424 my ($path) = @_;
1425
1426 my $fh = IO::File->new("+<$path") || return;
1427 my $res = scsi_inquiry($fh, 1);
1428 close($fh);
1429
1430 return $res;
1431 }
1432
1433 sub print_tabletdevice_full {
1434 my ($conf, $arch) = @_;
1435
1436 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1437
1438 # we use uhci for old VMs because tablet driver was buggy in older qemu
1439 my $usbbus;
1440 if (PVE::QemuServer::Machine::machine_type_is_q35($conf) || $arch eq 'aarch64') {
1441 $usbbus = 'ehci';
1442 } else {
1443 $usbbus = 'uhci';
1444 }
1445
1446 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1447 }
1448
1449 sub print_keyboarddevice_full {
1450 my ($conf, $arch) = @_;
1451
1452 return if $arch ne 'aarch64';
1453
1454 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1455 }
1456
1457 my sub get_drive_id {
1458 my ($drive) = @_;
1459 return "$drive->{interface}$drive->{index}";
1460 }
1461
1462 sub print_drivedevice_full {
1463 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1464
1465 my $device = '';
1466 my $maxdev = 0;
1467
1468 my $drive_id = get_drive_id($drive);
1469 if ($drive->{interface} eq 'virtio') {
1470 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1471 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1472 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1473 } elsif ($drive->{interface} eq 'scsi') {
1474
1475 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1476 my $unit = $drive->{index} % $maxdev;
1477 my $devicetype = 'hd';
1478 my $path = '';
1479 if (drive_is_cdrom($drive)) {
1480 $devicetype = 'cd';
1481 } else {
1482 if ($drive->{file} =~ m|^/|) {
1483 $path = $drive->{file};
1484 if (my $info = path_is_scsi($path)) {
1485 if ($info->{type} == 0 && $drive->{scsiblock}) {
1486 $devicetype = 'block';
1487 } elsif ($info->{type} == 1) { # tape
1488 $devicetype = 'generic';
1489 }
1490 }
1491 } else {
1492 $path = PVE::Storage::path($storecfg, $drive->{file});
1493 }
1494
1495 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
1496 my $version = extract_version($machine_type, kvm_user_version());
1497 if ($path =~ m/^iscsi\:\/\// &&
1498 !min_version($version, 4, 1)) {
1499 $devicetype = 'generic';
1500 }
1501 }
1502
1503 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1504 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
1505 } else {
1506 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1507 .",lun=$drive->{index}";
1508 }
1509 $device .= ",drive=drive-$drive_id,id=$drive_id";
1510
1511 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1512 $device .= ",rotation_rate=1";
1513 }
1514 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1515
1516 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1517 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1518 my $controller = int($drive->{index} / $maxdev);
1519 my $unit = $drive->{index} % $maxdev;
1520 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1521
1522 $device = "ide-$devicetype";
1523 if ($drive->{interface} eq 'ide') {
1524 $device .= ",bus=ide.$controller,unit=$unit";
1525 } else {
1526 $device .= ",bus=ahci$controller.$unit";
1527 }
1528 $device .= ",drive=drive-$drive_id,id=$drive_id";
1529
1530 if ($devicetype eq 'hd') {
1531 if (my $model = $drive->{model}) {
1532 $model = URI::Escape::uri_unescape($model);
1533 $device .= ",model=$model";
1534 }
1535 if ($drive->{ssd}) {
1536 $device .= ",rotation_rate=1";
1537 }
1538 }
1539 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1540 } elsif ($drive->{interface} eq 'usb') {
1541 die "implement me";
1542 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1543 } else {
1544 die "unsupported interface type";
1545 }
1546
1547 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1548
1549 if (my $serial = $drive->{serial}) {
1550 $serial = URI::Escape::uri_unescape($serial);
1551 $device .= ",serial=$serial";
1552 }
1553
1554
1555 return $device;
1556 }
1557
1558 sub get_initiator_name {
1559 my $initiator;
1560
1561 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1562 while (defined(my $line = <$fh>)) {
1563 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1564 $initiator = $1;
1565 last;
1566 }
1567 $fh->close();
1568
1569 return $initiator;
1570 }
1571
1572 sub print_drive_commandline_full {
1573 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1574
1575 my $path;
1576 my $volid = $drive->{file};
1577 my $format = $drive->{format};
1578 my $drive_id = get_drive_id($drive);
1579
1580 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1581 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1582
1583 if (drive_is_cdrom($drive)) {
1584 $path = get_iso_path($storecfg, $vmid, $volid);
1585 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
1586 } else {
1587 if ($storeid) {
1588 $path = PVE::Storage::path($storecfg, $volid);
1589 $format //= qemu_img_format($scfg, $volname);
1590 } else {
1591 $path = $volid;
1592 $format //= "raw";
1593 }
1594 }
1595
1596 my $is_rbd = $path =~ m/^rbd:/;
1597
1598 my $opts = '';
1599 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1600 foreach my $o (@qemu_drive_options) {
1601 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1602 }
1603
1604 # snapshot only accepts on|off
1605 if (defined($drive->{snapshot})) {
1606 my $v = $drive->{snapshot} ? 'on' : 'off';
1607 $opts .= ",snapshot=$v";
1608 }
1609
1610 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1611 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
1612 }
1613
1614 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1615 my ($dir, $qmpname) = @$type;
1616 if (my $v = $drive->{"mbps$dir"}) {
1617 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1618 }
1619 if (my $v = $drive->{"mbps${dir}_max"}) {
1620 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1621 }
1622 if (my $v = $drive->{"bps${dir}_max_length"}) {
1623 $opts .= ",throttling.bps$qmpname-max-length=$v";
1624 }
1625 if (my $v = $drive->{"iops${dir}"}) {
1626 $opts .= ",throttling.iops$qmpname=$v";
1627 }
1628 if (my $v = $drive->{"iops${dir}_max"}) {
1629 $opts .= ",throttling.iops$qmpname-max=$v";
1630 }
1631 if (my $v = $drive->{"iops${dir}_max_length"}) {
1632 $opts .= ",throttling.iops$qmpname-max-length=$v";
1633 }
1634 }
1635
1636 if ($pbs_name) {
1637 $format = "rbd" if $is_rbd;
1638 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1639 if !$format;
1640 $opts .= ",format=alloc-track,file.driver=$format";
1641 } elsif ($format) {
1642 $opts .= ",format=$format";
1643 }
1644
1645 my $cache_direct = 0;
1646
1647 if (my $cache = $drive->{cache}) {
1648 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1649 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1650 $opts .= ",cache=none";
1651 $cache_direct = 1;
1652 }
1653
1654 # io_uring with cache mode writeback or writethrough on krbd will hang...
1655 my $rbd_no_io_uring = $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1656
1657 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1658 # sometimes, just plain disable...
1659 my $lvm_no_io_uring = $scfg && $scfg->{type} eq 'lvm';
1660
1661 # io_uring causes problems when used with CIFS since kernel 5.15
1662 # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
1663 my $cifs_no_io_uring = $scfg && $scfg->{type} eq 'cifs';
1664
1665 if (!$drive->{aio}) {
1666 if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring && !$cifs_no_io_uring) {
1667 # io_uring supports all cache modes
1668 $opts .= ",aio=io_uring";
1669 } else {
1670 # aio native works only with O_DIRECT
1671 if($cache_direct) {
1672 $opts .= ",aio=native";
1673 } else {
1674 $opts .= ",aio=threads";
1675 }
1676 }
1677 }
1678
1679 if (!drive_is_cdrom($drive)) {
1680 my $detectzeroes;
1681 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1682 $detectzeroes = 'off';
1683 } elsif ($drive->{discard}) {
1684 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1685 } else {
1686 # This used to be our default with discard not being specified:
1687 $detectzeroes = 'on';
1688 }
1689
1690 # note: 'detect-zeroes' works per blockdev and we want it to persist
1691 # after the alloc-track is removed, so put it on 'file' directly
1692 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1693 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1694 }
1695
1696 if ($pbs_name) {
1697 $opts .= ",backing=$pbs_name";
1698 $opts .= ",auto-remove=on";
1699 }
1700
1701 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1702 my $file_param = "file";
1703 if ($pbs_name) {
1704 # non-rbd drivers require the underlying file to be a seperate block
1705 # node, so add a second .file indirection
1706 $file_param .= ".file" if !$is_rbd;
1707 $file_param .= ".filename";
1708 }
1709 my $pathinfo = $path ? "$file_param=$path," : '';
1710
1711 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1712 }
1713
1714 sub print_pbs_blockdev {
1715 my ($pbs_conf, $pbs_name) = @_;
1716 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1717 $blockdev .= ",repository=$pbs_conf->{repository}";
1718 $blockdev .= ",namespace=$pbs_conf->{namespace}" if $pbs_conf->{namespace};
1719 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1720 $blockdev .= ",archive=$pbs_conf->{archive}";
1721 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1722 return $blockdev;
1723 }
1724
1725 sub print_netdevice_full {
1726 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type) = @_;
1727
1728 my $device = $net->{model};
1729 if ($net->{model} eq 'virtio') {
1730 $device = 'virtio-net-pci';
1731 };
1732
1733 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1734 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1735 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1736 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1737 # and out of each queue plus one config interrupt and control vector queue
1738 my $vectors = $net->{queues} * 2 + 2;
1739 $tmpstr .= ",vectors=$vectors,mq=on";
1740 }
1741 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1742
1743 if (my $mtu = $net->{mtu}) {
1744 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1745 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1746 if ($mtu == 1) {
1747 $mtu = $bridge_mtu;
1748 } elsif ($mtu < 576) {
1749 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1750 } elsif ($mtu > $bridge_mtu) {
1751 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1752 }
1753 $tmpstr .= ",host_mtu=$mtu";
1754 } else {
1755 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1756 }
1757 }
1758
1759 if ($use_old_bios_files) {
1760 my $romfile;
1761 if ($device eq 'virtio-net-pci') {
1762 $romfile = 'pxe-virtio.rom';
1763 } elsif ($device eq 'e1000') {
1764 $romfile = 'pxe-e1000.rom';
1765 } elsif ($device eq 'e1000e') {
1766 $romfile = 'pxe-e1000e.rom';
1767 } elsif ($device eq 'ne2k') {
1768 $romfile = 'pxe-ne2k_pci.rom';
1769 } elsif ($device eq 'pcnet') {
1770 $romfile = 'pxe-pcnet.rom';
1771 } elsif ($device eq 'rtl8139') {
1772 $romfile = 'pxe-rtl8139.rom';
1773 }
1774 $tmpstr .= ",romfile=$romfile" if $romfile;
1775 }
1776
1777 return $tmpstr;
1778 }
1779
1780 sub print_netdev_full {
1781 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1782
1783 my $i = '';
1784 if ($netid =~ m/^net(\d+)$/) {
1785 $i = int($1);
1786 }
1787
1788 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1789
1790 my $ifname = "tap${vmid}i$i";
1791
1792 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1793 die "interface name '$ifname' is too long (max 15 character)\n"
1794 if length($ifname) >= 16;
1795
1796 my $vhostparam = '';
1797 if (is_native($arch)) {
1798 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1799 }
1800
1801 my $vmname = $conf->{name} || "vm$vmid";
1802
1803 my $netdev = "";
1804 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1805
1806 if ($net->{bridge}) {
1807 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1808 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1809 } else {
1810 $netdev = "type=user,id=$netid,hostname=$vmname";
1811 }
1812
1813 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1814
1815 return $netdev;
1816 }
1817
1818 my $vga_map = {
1819 'cirrus' => 'cirrus-vga',
1820 'std' => 'VGA',
1821 'vmware' => 'vmware-svga',
1822 'virtio' => 'virtio-vga',
1823 'virtio-gl' => 'virtio-vga-gl',
1824 };
1825
1826 sub print_vga_device {
1827 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1828
1829 my $type = $vga_map->{$vga->{type}};
1830 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1831 $type = 'virtio-gpu';
1832 }
1833 my $vgamem_mb = $vga->{memory};
1834
1835 my $max_outputs = '';
1836 if ($qxlnum) {
1837 $type = $id ? 'qxl' : 'qxl-vga';
1838
1839 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1840 # set max outputs so linux can have up to 4 qxl displays with one device
1841 if (min_version($machine_version, 4, 1)) {
1842 $max_outputs = ",max_outputs=4";
1843 }
1844 }
1845 }
1846
1847 die "no devicetype for $vga->{type}\n" if !$type;
1848
1849 my $memory = "";
1850 if ($vgamem_mb) {
1851 if ($vga->{type} =~ /^virtio/) {
1852 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1853 $memory = ",max_hostmem=$bytes";
1854 } elsif ($qxlnum) {
1855 # from https://www.spice-space.org/multiple-monitors.html
1856 $memory = ",vgamem_mb=$vga->{memory}";
1857 my $ram = $vgamem_mb * 4;
1858 my $vram = $vgamem_mb * 2;
1859 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1860 } else {
1861 $memory = ",vgamem_mb=$vga->{memory}";
1862 }
1863 } elsif ($qxlnum && $id) {
1864 $memory = ",ram_size=67108864,vram_size=33554432";
1865 }
1866
1867 my $edidoff = "";
1868 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1869 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1870 }
1871
1872 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1873 my $vgaid = "vga" . ($id // '');
1874 my $pciaddr;
1875 if ($q35 && $vgaid eq 'vga') {
1876 # the first display uses pcie.0 bus on q35 machines
1877 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1878 } else {
1879 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1880 }
1881
1882 if ($vga->{type} eq 'virtio-gl') {
1883 my $base = '/usr/lib/x86_64-linux-gnu/lib';
1884 die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
1885 if !-e "${base}EGL.so.1" || !-e "${base}GL.so.1";
1886
1887 die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
1888 if !PVE::Tools::dir_glob_regex('/dev/dri/', "renderD.*");
1889 }
1890
1891 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1892 }
1893
1894 sub parse_number_sets {
1895 my ($set) = @_;
1896 my $res = [];
1897 foreach my $part (split(/;/, $set)) {
1898 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1899 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1900 push @$res, [ $1, $2 ];
1901 } else {
1902 die "invalid range: $part\n";
1903 }
1904 }
1905 return $res;
1906 }
1907
1908 sub parse_numa {
1909 my ($data) = @_;
1910
1911 my $res = parse_property_string($numa_fmt, $data);
1912 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1913 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
1914 return $res;
1915 }
1916
1917 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1918 sub parse_net {
1919 my ($data) = @_;
1920
1921 my $res = eval { parse_property_string($net_fmt, $data) };
1922 if ($@) {
1923 warn $@;
1924 return;
1925 }
1926 if (!defined($res->{macaddr})) {
1927 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1928 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1929 }
1930 return $res;
1931 }
1932
1933 # ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1934 sub parse_ipconfig {
1935 my ($data) = @_;
1936
1937 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1938 if ($@) {
1939 warn $@;
1940 return;
1941 }
1942
1943 if ($res->{gw} && !$res->{ip}) {
1944 warn 'gateway specified without specifying an IP address';
1945 return;
1946 }
1947 if ($res->{gw6} && !$res->{ip6}) {
1948 warn 'IPv6 gateway specified without specifying an IPv6 address';
1949 return;
1950 }
1951 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1952 warn 'gateway specified together with DHCP';
1953 return;
1954 }
1955 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1956 # gw6 + auto/dhcp
1957 warn "IPv6 gateway specified together with $res->{ip6} address";
1958 return;
1959 }
1960
1961 if (!$res->{ip} && !$res->{ip6}) {
1962 return { ip => 'dhcp', ip6 => 'dhcp' };
1963 }
1964
1965 return $res;
1966 }
1967
1968 sub print_net {
1969 my $net = shift;
1970
1971 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1972 }
1973
1974 sub add_random_macs {
1975 my ($settings) = @_;
1976
1977 foreach my $opt (keys %$settings) {
1978 next if $opt !~ m/^net(\d+)$/;
1979 my $net = parse_net($settings->{$opt});
1980 next if !$net;
1981 $settings->{$opt} = print_net($net);
1982 }
1983 }
1984
1985 sub vm_is_volid_owner {
1986 my ($storecfg, $vmid, $volid) = @_;
1987
1988 if ($volid !~ m|^/|) {
1989 my ($path, $owner);
1990 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
1991 if ($owner && ($owner == $vmid)) {
1992 return 1;
1993 }
1994 }
1995
1996 return;
1997 }
1998
1999 sub vmconfig_register_unused_drive {
2000 my ($storecfg, $vmid, $conf, $drive) = @_;
2001
2002 if (drive_is_cloudinit($drive)) {
2003 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
2004 warn $@ if $@;
2005 } elsif (!drive_is_cdrom($drive)) {
2006 my $volid = $drive->{file};
2007 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
2008 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
2009 }
2010 }
2011 }
2012
2013 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
2014 my $smbios1_fmt = {
2015 uuid => {
2016 type => 'string',
2017 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
2018 format_description => 'UUID',
2019 description => "Set SMBIOS1 UUID.",
2020 optional => 1,
2021 },
2022 version => {
2023 type => 'string',
2024 pattern => '[A-Za-z0-9+\/]+={0,2}',
2025 format_description => 'Base64 encoded string',
2026 description => "Set SMBIOS1 version.",
2027 optional => 1,
2028 },
2029 serial => {
2030 type => 'string',
2031 pattern => '[A-Za-z0-9+\/]+={0,2}',
2032 format_description => 'Base64 encoded string',
2033 description => "Set SMBIOS1 serial number.",
2034 optional => 1,
2035 },
2036 manufacturer => {
2037 type => 'string',
2038 pattern => '[A-Za-z0-9+\/]+={0,2}',
2039 format_description => 'Base64 encoded string',
2040 description => "Set SMBIOS1 manufacturer.",
2041 optional => 1,
2042 },
2043 product => {
2044 type => 'string',
2045 pattern => '[A-Za-z0-9+\/]+={0,2}',
2046 format_description => 'Base64 encoded string',
2047 description => "Set SMBIOS1 product ID.",
2048 optional => 1,
2049 },
2050 sku => {
2051 type => 'string',
2052 pattern => '[A-Za-z0-9+\/]+={0,2}',
2053 format_description => 'Base64 encoded string',
2054 description => "Set SMBIOS1 SKU string.",
2055 optional => 1,
2056 },
2057 family => {
2058 type => 'string',
2059 pattern => '[A-Za-z0-9+\/]+={0,2}',
2060 format_description => 'Base64 encoded string',
2061 description => "Set SMBIOS1 family string.",
2062 optional => 1,
2063 },
2064 base64 => {
2065 type => 'boolean',
2066 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2067 optional => 1,
2068 },
2069 };
2070
2071 sub parse_smbios1 {
2072 my ($data) = @_;
2073
2074 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2075 warn $@ if $@;
2076 return $res;
2077 }
2078
2079 sub print_smbios1 {
2080 my ($smbios1) = @_;
2081 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2082 }
2083
2084 PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2085
2086 sub parse_watchdog {
2087 my ($value) = @_;
2088
2089 return if !$value;
2090
2091 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2092 warn $@ if $@;
2093 return $res;
2094 }
2095
2096 sub parse_guest_agent {
2097 my ($conf) = @_;
2098
2099 return {} if !defined($conf->{agent});
2100
2101 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2102 warn $@ if $@;
2103
2104 # if the agent is disabled ignore the other potentially set properties
2105 return {} if !$res->{enabled};
2106 return $res;
2107 }
2108
2109 sub get_qga_key {
2110 my ($conf, $key) = @_;
2111 return undef if !defined($conf->{agent});
2112
2113 my $agent = parse_guest_agent($conf);
2114 return $agent->{$key};
2115 }
2116
2117 sub parse_vga {
2118 my ($value) = @_;
2119
2120 return {} if !$value;
2121 my $res = eval { parse_property_string($vga_fmt, $value) };
2122 warn $@ if $@;
2123 return $res;
2124 }
2125
2126 sub parse_rng {
2127 my ($value) = @_;
2128
2129 return if !$value;
2130
2131 my $res = eval { parse_property_string($rng_fmt, $value) };
2132 warn $@ if $@;
2133 return $res;
2134 }
2135
2136 sub parse_meta_info {
2137 my ($value) = @_;
2138
2139 return if !$value;
2140
2141 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2142 warn $@ if $@;
2143 return $res;
2144 }
2145
2146 sub new_meta_info_string {
2147 my () = @_; # for now do not allow to override any value
2148
2149 return PVE::JSONSchema::print_property_string(
2150 {
2151 'creation-qemu' => kvm_user_version(),
2152 ctime => "". int(time()),
2153 },
2154 $meta_info_fmt
2155 );
2156 }
2157
2158 sub qemu_created_version_fixups {
2159 my ($conf, $forcemachine, $kvmver) = @_;
2160
2161 my $meta = parse_meta_info($conf->{meta}) // {};
2162 my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
2163
2164 # check if we need to apply some handling for VMs that always use the latest machine version but
2165 # had a machine version transition happen that affected HW such that, e.g., an OS config change
2166 # would be required (we do not want to pin machine version for non-windows OS type)
2167 if (
2168 (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
2169 && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
2170 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
2171 && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
2172 ) {
2173 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
2174 if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
2175 # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
2176 # and thus with the predictable interface naming of systemd
2177 return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
2178 }
2179 }
2180 return;
2181 }
2182
2183 PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
2184 sub verify_usb_device {
2185 my ($value, $noerr) = @_;
2186
2187 return $value if parse_usb_device($value);
2188
2189 return if $noerr;
2190
2191 die "unable to parse usb device\n";
2192 }
2193
2194 # add JSON properties for create and set function
2195 sub json_config_properties {
2196 my ($prop, $with_disk_alloc) = @_;
2197
2198 my $skip_json_config_opts = {
2199 parent => 1,
2200 snaptime => 1,
2201 vmstate => 1,
2202 runningmachine => 1,
2203 runningcpu => 1,
2204 meta => 1,
2205 };
2206
2207 foreach my $opt (keys %$confdesc) {
2208 next if $skip_json_config_opts->{$opt};
2209
2210 if ($with_disk_alloc && is_valid_drivename($opt)) {
2211 $prop->{$opt} = $PVE::QemuServer::Drive::drivedesc_hash_with_alloc->{$opt};
2212 } else {
2213 $prop->{$opt} = $confdesc->{$opt};
2214 }
2215 }
2216
2217 return $prop;
2218 }
2219
2220 # Properties that we can read from an OVF file
2221 sub json_ovf_properties {
2222 my $prop = {};
2223
2224 for my $device (PVE::QemuServer::Drive::valid_drive_names()) {
2225 $prop->{$device} = {
2226 type => 'string',
2227 format => 'pve-volume-id-or-absolute-path',
2228 description => "Disk image that gets imported to $device",
2229 optional => 1,
2230 };
2231 }
2232
2233 $prop->{cores} = {
2234 type => 'integer',
2235 description => "The number of CPU cores.",
2236 optional => 1,
2237 };
2238 $prop->{memory} = {
2239 type => 'integer',
2240 description => "Amount of RAM for the VM in MB.",
2241 optional => 1,
2242 };
2243 $prop->{name} = {
2244 type => 'string',
2245 description => "Name of the VM.",
2246 optional => 1,
2247 };
2248
2249 return $prop;
2250 }
2251
2252 # return copy of $confdesc_cloudinit to generate documentation
2253 sub cloudinit_config_properties {
2254
2255 return dclone($confdesc_cloudinit);
2256 }
2257
2258 sub check_type {
2259 my ($key, $value) = @_;
2260
2261 die "unknown setting '$key'\n" if !$confdesc->{$key};
2262
2263 my $type = $confdesc->{$key}->{type};
2264
2265 if (!defined($value)) {
2266 die "got undefined value\n";
2267 }
2268
2269 if ($value =~ m/[\n\r]/) {
2270 die "property contains a line feed\n";
2271 }
2272
2273 if ($type eq 'boolean') {
2274 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2275 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2276 die "type check ('boolean') failed - got '$value'\n";
2277 } elsif ($type eq 'integer') {
2278 return int($1) if $value =~ m/^(\d+)$/;
2279 die "type check ('integer') failed - got '$value'\n";
2280 } elsif ($type eq 'number') {
2281 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2282 die "type check ('number') failed - got '$value'\n";
2283 } elsif ($type eq 'string') {
2284 if (my $fmt = $confdesc->{$key}->{format}) {
2285 PVE::JSONSchema::check_format($fmt, $value);
2286 return $value;
2287 }
2288 $value =~ s/^\"(.*)\"$/$1/;
2289 return $value;
2290 } else {
2291 die "internal error"
2292 }
2293 }
2294
2295 sub destroy_vm {
2296 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2297
2298 my $conf = PVE::QemuConfig->load_config($vmid);
2299
2300 PVE::QemuConfig->check_lock($conf) if !$skiplock;
2301
2302 if ($conf->{template}) {
2303 # check if any base image is still used by a linked clone
2304 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2305 my ($ds, $drive) = @_;
2306 return if drive_is_cdrom($drive);
2307
2308 my $volid = $drive->{file};
2309 return if !$volid || $volid =~ m|^/|;
2310
2311 die "base volume '$volid' is still in use by linked cloned\n"
2312 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2313
2314 });
2315 }
2316
2317 my $volids = {};
2318 my $remove_owned_drive = sub {
2319 my ($ds, $drive) = @_;
2320 return if drive_is_cdrom($drive, 1);
2321
2322 my $volid = $drive->{file};
2323 return if !$volid || $volid =~ m|^/|;
2324 return if $volids->{$volid};
2325
2326 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2327 return if !$path || !$owner || ($owner != $vmid);
2328
2329 $volids->{$volid} = 1;
2330 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2331 warn "Could not remove disk '$volid', check manually: $@" if $@;
2332 };
2333
2334 # only remove disks owned by this VM (referenced in the config)
2335 my $include_opts = {
2336 include_unused => 1,
2337 extra_keys => ['vmstate'],
2338 };
2339 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2340
2341 for my $snap (values %{$conf->{snapshots}}) {
2342 next if !defined($snap->{vmstate});
2343 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2344 next if !defined($drive);
2345 $remove_owned_drive->('vmstate', $drive);
2346 }
2347
2348 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2349
2350 if ($purge_unreferenced) { # also remove unreferenced disk
2351 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2352 PVE::Storage::foreach_volid($vmdisks, sub {
2353 my ($volid, $sid, $volname, $d) = @_;
2354 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2355 warn $@ if $@;
2356 });
2357 }
2358
2359 if (defined $replacement_conf) {
2360 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2361 } else {
2362 PVE::QemuConfig->destroy_config($vmid);
2363 }
2364 }
2365
2366 sub parse_vm_config {
2367 my ($filename, $raw, $strict) = @_;
2368
2369 return if !defined($raw);
2370
2371 my $res = {
2372 digest => Digest::SHA::sha1_hex($raw),
2373 snapshots => {},
2374 pending => {},
2375 };
2376
2377 my $handle_error = sub {
2378 my ($msg) = @_;
2379
2380 if ($strict) {
2381 die $msg;
2382 } else {
2383 warn $msg;
2384 }
2385 };
2386
2387 $filename =~ m|/qemu-server/(\d+)\.conf$|
2388 || die "got strange filename '$filename'";
2389
2390 my $vmid = $1;
2391
2392 my $conf = $res;
2393 my $descr;
2394 my $section = '';
2395
2396 my @lines = split(/\n/, $raw);
2397 foreach my $line (@lines) {
2398 next if $line =~ m/^\s*$/;
2399
2400 if ($line =~ m/^\[PENDING\]\s*$/i) {
2401 $section = 'pending';
2402 if (defined($descr)) {
2403 $descr =~ s/\s+$//;
2404 $conf->{description} = $descr;
2405 }
2406 $descr = undef;
2407 $conf = $res->{$section} = {};
2408 next;
2409
2410 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2411 $section = $1;
2412 if (defined($descr)) {
2413 $descr =~ s/\s+$//;
2414 $conf->{description} = $descr;
2415 }
2416 $descr = undef;
2417 $conf = $res->{snapshots}->{$section} = {};
2418 next;
2419 }
2420
2421 if ($line =~ m/^\#(.*)$/) {
2422 $descr = '' if !defined($descr);
2423 $descr .= PVE::Tools::decode_text($1) . "\n";
2424 next;
2425 }
2426
2427 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2428 $descr = '' if !defined($descr);
2429 $descr .= PVE::Tools::decode_text($2);
2430 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2431 $conf->{snapstate} = $1;
2432 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2433 my $key = $1;
2434 my $value = $2;
2435 $conf->{$key} = $value;
2436 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2437 my $value = $1;
2438 if ($section eq 'pending') {
2439 $conf->{delete} = $value; # we parse this later
2440 } else {
2441 $handle_error->("vm $vmid - property 'delete' is only allowed in [PENDING]\n");
2442 }
2443 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2444 my $key = $1;
2445 my $value = $2;
2446 eval { $value = check_type($key, $value); };
2447 if ($@) {
2448 $handle_error->("vm $vmid - unable to parse value of '$key' - $@");
2449 } else {
2450 $key = 'ide2' if $key eq 'cdrom';
2451 my $fmt = $confdesc->{$key}->{format};
2452 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2453 my $v = parse_drive($key, $value);
2454 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2455 $v->{file} = $volid;
2456 $value = print_drive($v);
2457 } else {
2458 $handle_error->("vm $vmid - unable to parse value of '$key'\n");
2459 next;
2460 }
2461 }
2462
2463 $conf->{$key} = $value;
2464 }
2465 } else {
2466 $handle_error->("vm $vmid - unable to parse config: $line\n");
2467 }
2468 }
2469
2470 if (defined($descr)) {
2471 $descr =~ s/\s+$//;
2472 $conf->{description} = $descr;
2473 }
2474 delete $res->{snapstate}; # just to be sure
2475
2476 return $res;
2477 }
2478
2479 sub write_vm_config {
2480 my ($filename, $conf) = @_;
2481
2482 delete $conf->{snapstate}; # just to be sure
2483
2484 if ($conf->{cdrom}) {
2485 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2486 $conf->{ide2} = $conf->{cdrom};
2487 delete $conf->{cdrom};
2488 }
2489
2490 # we do not use 'smp' any longer
2491 if ($conf->{sockets}) {
2492 delete $conf->{smp};
2493 } elsif ($conf->{smp}) {
2494 $conf->{sockets} = $conf->{smp};
2495 delete $conf->{cores};
2496 delete $conf->{smp};
2497 }
2498
2499 my $used_volids = {};
2500
2501 my $cleanup_config = sub {
2502 my ($cref, $pending, $snapname) = @_;
2503
2504 foreach my $key (keys %$cref) {
2505 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2506 $key eq 'snapstate' || $key eq 'pending';
2507 my $value = $cref->{$key};
2508 if ($key eq 'delete') {
2509 die "propertry 'delete' is only allowed in [PENDING]\n"
2510 if !$pending;
2511 # fixme: check syntax?
2512 next;
2513 }
2514 eval { $value = check_type($key, $value); };
2515 die "unable to parse value of '$key' - $@" if $@;
2516
2517 $cref->{$key} = $value;
2518
2519 if (!$snapname && is_valid_drivename($key)) {
2520 my $drive = parse_drive($key, $value);
2521 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2522 }
2523 }
2524 };
2525
2526 &$cleanup_config($conf);
2527
2528 &$cleanup_config($conf->{pending}, 1);
2529
2530 foreach my $snapname (keys %{$conf->{snapshots}}) {
2531 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2532 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2533 }
2534
2535 # remove 'unusedX' settings if we re-add a volume
2536 foreach my $key (keys %$conf) {
2537 my $value = $conf->{$key};
2538 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2539 delete $conf->{$key};
2540 }
2541 }
2542
2543 my $generate_raw_config = sub {
2544 my ($conf, $pending) = @_;
2545
2546 my $raw = '';
2547
2548 # add description as comment to top of file
2549 if (defined(my $descr = $conf->{description})) {
2550 if ($descr) {
2551 foreach my $cl (split(/\n/, $descr)) {
2552 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2553 }
2554 } else {
2555 $raw .= "#\n" if $pending;
2556 }
2557 }
2558
2559 foreach my $key (sort keys %$conf) {
2560 next if $key =~ /^(digest|description|pending|snapshots)$/;
2561 $raw .= "$key: $conf->{$key}\n";
2562 }
2563 return $raw;
2564 };
2565
2566 my $raw = &$generate_raw_config($conf);
2567
2568 if (scalar(keys %{$conf->{pending}})){
2569 $raw .= "\n[PENDING]\n";
2570 $raw .= &$generate_raw_config($conf->{pending}, 1);
2571 }
2572
2573 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2574 $raw .= "\n[$snapname]\n";
2575 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2576 }
2577
2578 return $raw;
2579 }
2580
2581 sub load_defaults {
2582
2583 my $res = {};
2584
2585 # we use static defaults from our JSON schema configuration
2586 foreach my $key (keys %$confdesc) {
2587 if (defined(my $default = $confdesc->{$key}->{default})) {
2588 $res->{$key} = $default;
2589 }
2590 }
2591
2592 return $res;
2593 }
2594
2595 sub config_list {
2596 my $vmlist = PVE::Cluster::get_vmlist();
2597 my $res = {};
2598 return $res if !$vmlist || !$vmlist->{ids};
2599 my $ids = $vmlist->{ids};
2600 my $nodename = nodename();
2601
2602 foreach my $vmid (keys %$ids) {
2603 my $d = $ids->{$vmid};
2604 next if !$d->{node} || $d->{node} ne $nodename;
2605 next if !$d->{type} || $d->{type} ne 'qemu';
2606 $res->{$vmid}->{exists} = 1;
2607 }
2608 return $res;
2609 }
2610
2611 # test if VM uses local resources (to prevent migration)
2612 sub check_local_resources {
2613 my ($conf, $noerr) = @_;
2614
2615 my @loc_res = ();
2616
2617 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2618 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2619
2620 push @loc_res, "ivshmem" if $conf->{ivshmem};
2621
2622 foreach my $k (keys %$conf) {
2623 next if $k =~ m/^usb/ && ($conf->{$k} =~ m/^spice(?![^,])/);
2624 # sockets are safe: they will recreated be on the target side post-migrate
2625 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2626 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2627 }
2628
2629 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2630
2631 return \@loc_res;
2632 }
2633
2634 # check if used storages are available on all nodes (use by migrate)
2635 sub check_storage_availability {
2636 my ($storecfg, $conf, $node) = @_;
2637
2638 PVE::QemuConfig->foreach_volume($conf, sub {
2639 my ($ds, $drive) = @_;
2640
2641 my $volid = $drive->{file};
2642 return if !$volid;
2643
2644 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2645 return if !$sid;
2646
2647 # check if storage is available on both nodes
2648 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2649 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2650
2651 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2652
2653 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2654 if !$scfg->{content}->{$vtype};
2655 });
2656 }
2657
2658 # list nodes where all VM images are available (used by has_feature API)
2659 sub shared_nodes {
2660 my ($conf, $storecfg) = @_;
2661
2662 my $nodelist = PVE::Cluster::get_nodelist();
2663 my $nodehash = { map { $_ => 1 } @$nodelist };
2664 my $nodename = nodename();
2665
2666 PVE::QemuConfig->foreach_volume($conf, sub {
2667 my ($ds, $drive) = @_;
2668
2669 my $volid = $drive->{file};
2670 return if !$volid;
2671
2672 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2673 if ($storeid) {
2674 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2675 if ($scfg->{disable}) {
2676 $nodehash = {};
2677 } elsif (my $avail = $scfg->{nodes}) {
2678 foreach my $node (keys %$nodehash) {
2679 delete $nodehash->{$node} if !$avail->{$node};
2680 }
2681 } elsif (!$scfg->{shared}) {
2682 foreach my $node (keys %$nodehash) {
2683 delete $nodehash->{$node} if $node ne $nodename
2684 }
2685 }
2686 }
2687 });
2688
2689 return $nodehash
2690 }
2691
2692 sub check_local_storage_availability {
2693 my ($conf, $storecfg) = @_;
2694
2695 my $nodelist = PVE::Cluster::get_nodelist();
2696 my $nodehash = { map { $_ => {} } @$nodelist };
2697
2698 PVE::QemuConfig->foreach_volume($conf, sub {
2699 my ($ds, $drive) = @_;
2700
2701 my $volid = $drive->{file};
2702 return if !$volid;
2703
2704 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2705 if ($storeid) {
2706 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2707
2708 if ($scfg->{disable}) {
2709 foreach my $node (keys %$nodehash) {
2710 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2711 }
2712 } elsif (my $avail = $scfg->{nodes}) {
2713 foreach my $node (keys %$nodehash) {
2714 if (!$avail->{$node}) {
2715 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2716 }
2717 }
2718 }
2719 }
2720 });
2721
2722 foreach my $node (values %$nodehash) {
2723 if (my $unavail = $node->{unavailable_storages}) {
2724 $node->{unavailable_storages} = [ sort keys %$unavail ];
2725 }
2726 }
2727
2728 return $nodehash
2729 }
2730
2731 # Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2732 sub check_running {
2733 my ($vmid, $nocheck, $node) = @_;
2734
2735 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2736 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2737 }
2738
2739 sub vzlist {
2740
2741 my $vzlist = config_list();
2742
2743 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2744
2745 while (defined(my $de = $fd->read)) {
2746 next if $de !~ m/^(\d+)\.pid$/;
2747 my $vmid = $1;
2748 next if !defined($vzlist->{$vmid});
2749 if (my $pid = check_running($vmid)) {
2750 $vzlist->{$vmid}->{pid} = $pid;
2751 }
2752 }
2753
2754 return $vzlist;
2755 }
2756
2757 our $vmstatus_return_properties = {
2758 vmid => get_standard_option('pve-vmid'),
2759 status => {
2760 description => "Qemu process status.",
2761 type => 'string',
2762 enum => ['stopped', 'running'],
2763 },
2764 maxmem => {
2765 description => "Maximum memory in bytes.",
2766 type => 'integer',
2767 optional => 1,
2768 renderer => 'bytes',
2769 },
2770 maxdisk => {
2771 description => "Root disk size in bytes.",
2772 type => 'integer',
2773 optional => 1,
2774 renderer => 'bytes',
2775 },
2776 name => {
2777 description => "VM name.",
2778 type => 'string',
2779 optional => 1,
2780 },
2781 qmpstatus => {
2782 description => "Qemu QMP agent status.",
2783 type => 'string',
2784 optional => 1,
2785 },
2786 pid => {
2787 description => "PID of running qemu process.",
2788 type => 'integer',
2789 optional => 1,
2790 },
2791 uptime => {
2792 description => "Uptime.",
2793 type => 'integer',
2794 optional => 1,
2795 renderer => 'duration',
2796 },
2797 cpus => {
2798 description => "Maximum usable CPUs.",
2799 type => 'number',
2800 optional => 1,
2801 },
2802 lock => {
2803 description => "The current config lock, if any.",
2804 type => 'string',
2805 optional => 1,
2806 },
2807 tags => {
2808 description => "The current configured tags, if any",
2809 type => 'string',
2810 optional => 1,
2811 },
2812 'running-machine' => {
2813 description => "The currently running machine type (if running).",
2814 type => 'string',
2815 optional => 1,
2816 },
2817 'running-qemu' => {
2818 description => "The currently running QEMU version (if running).",
2819 type => 'string',
2820 optional => 1,
2821 },
2822 };
2823
2824 my $last_proc_pid_stat;
2825
2826 # get VM status information
2827 # This must be fast and should not block ($full == false)
2828 # We only query KVM using QMP if $full == true (this can be slow)
2829 sub vmstatus {
2830 my ($opt_vmid, $full) = @_;
2831
2832 my $res = {};
2833
2834 my $storecfg = PVE::Storage::config();
2835
2836 my $list = vzlist();
2837 my $defaults = load_defaults();
2838
2839 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2840
2841 my $cpucount = $cpuinfo->{cpus} || 1;
2842
2843 foreach my $vmid (keys %$list) {
2844 next if $opt_vmid && ($vmid ne $opt_vmid);
2845
2846 my $conf = PVE::QemuConfig->load_config($vmid);
2847
2848 my $d = { vmid => int($vmid) };
2849 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2850
2851 # fixme: better status?
2852 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2853
2854 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2855 if (defined($size)) {
2856 $d->{disk} = 0; # no info available
2857 $d->{maxdisk} = $size;
2858 } else {
2859 $d->{disk} = 0;
2860 $d->{maxdisk} = 0;
2861 }
2862
2863 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2864 * ($conf->{cores} || $defaults->{cores});
2865 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2866 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2867
2868 $d->{name} = $conf->{name} || "VM $vmid";
2869 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2870 : $defaults->{memory}*(1024*1024);
2871
2872 if ($conf->{balloon}) {
2873 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2874 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2875 : $defaults->{shares};
2876 }
2877
2878 $d->{uptime} = 0;
2879 $d->{cpu} = 0;
2880 $d->{mem} = 0;
2881
2882 $d->{netout} = 0;
2883 $d->{netin} = 0;
2884
2885 $d->{diskread} = 0;
2886 $d->{diskwrite} = 0;
2887
2888 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2889
2890 $d->{serial} = 1 if conf_has_serial($conf);
2891 $d->{lock} = $conf->{lock} if $conf->{lock};
2892 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2893
2894 $res->{$vmid} = $d;
2895 }
2896
2897 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2898 foreach my $dev (keys %$netdev) {
2899 next if $dev !~ m/^tap([1-9]\d*)i/;
2900 my $vmid = $1;
2901 my $d = $res->{$vmid};
2902 next if !$d;
2903
2904 $d->{netout} += $netdev->{$dev}->{receive};
2905 $d->{netin} += $netdev->{$dev}->{transmit};
2906
2907 if ($full) {
2908 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2909 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2910 }
2911
2912 }
2913
2914 my $ctime = gettimeofday;
2915
2916 foreach my $vmid (keys %$list) {
2917
2918 my $d = $res->{$vmid};
2919 my $pid = $d->{pid};
2920 next if !$pid;
2921
2922 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2923 next if !$pstat; # not running
2924
2925 my $used = $pstat->{utime} + $pstat->{stime};
2926
2927 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2928
2929 if ($pstat->{vsize}) {
2930 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2931 }
2932
2933 my $old = $last_proc_pid_stat->{$pid};
2934 if (!$old) {
2935 $last_proc_pid_stat->{$pid} = {
2936 time => $ctime,
2937 used => $used,
2938 cpu => 0,
2939 };
2940 next;
2941 }
2942
2943 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
2944
2945 if ($dtime > 1000) {
2946 my $dutime = $used - $old->{used};
2947
2948 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
2949 $last_proc_pid_stat->{$pid} = {
2950 time => $ctime,
2951 used => $used,
2952 cpu => $d->{cpu},
2953 };
2954 } else {
2955 $d->{cpu} = $old->{cpu};
2956 }
2957 }
2958
2959 return $res if !$full;
2960
2961 my $qmpclient = PVE::QMPClient->new();
2962
2963 my $ballooncb = sub {
2964 my ($vmid, $resp) = @_;
2965
2966 my $info = $resp->{'return'};
2967 return if !$info->{max_mem};
2968
2969 my $d = $res->{$vmid};
2970
2971 # use memory assigned to VM
2972 $d->{maxmem} = $info->{max_mem};
2973 $d->{balloon} = $info->{actual};
2974
2975 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
2976 $d->{mem} = $info->{total_mem} - $info->{free_mem};
2977 $d->{freemem} = $info->{free_mem};
2978 }
2979
2980 $d->{ballooninfo} = $info;
2981 };
2982
2983 my $blockstatscb = sub {
2984 my ($vmid, $resp) = @_;
2985 my $data = $resp->{'return'} || [];
2986 my $totalrdbytes = 0;
2987 my $totalwrbytes = 0;
2988
2989 for my $blockstat (@$data) {
2990 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
2991 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
2992
2993 $blockstat->{device} =~ s/drive-//;
2994 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
2995 }
2996 $res->{$vmid}->{diskread} = $totalrdbytes;
2997 $res->{$vmid}->{diskwrite} = $totalwrbytes;
2998 };
2999
3000 my $machinecb = sub {
3001 my ($vmid, $resp) = @_;
3002 my $data = $resp->{'return'} || [];
3003
3004 $res->{$vmid}->{'running-machine'} =
3005 PVE::QemuServer::Machine::current_from_query_machines($data);
3006 };
3007
3008 my $versioncb = sub {
3009 my ($vmid, $resp) = @_;
3010 my $data = $resp->{'return'} // {};
3011 my $version = 'unknown';
3012
3013 if (my $v = $data->{qemu}) {
3014 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
3015 }
3016
3017 $res->{$vmid}->{'running-qemu'} = $version;
3018 };
3019
3020 my $statuscb = sub {
3021 my ($vmid, $resp) = @_;
3022
3023 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
3024 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
3025 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
3026 # this fails if ballon driver is not loaded, so this must be
3027 # the last commnand (following command are aborted if this fails).
3028 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
3029
3030 my $status = 'unknown';
3031 if (!defined($status = $resp->{'return'}->{status})) {
3032 warn "unable to get VM status\n";
3033 return;
3034 }
3035
3036 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
3037 };
3038
3039 foreach my $vmid (keys %$list) {
3040 next if $opt_vmid && ($vmid ne $opt_vmid);
3041 next if !$res->{$vmid}->{pid}; # not running
3042 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
3043 }
3044
3045 $qmpclient->queue_execute(undef, 2);
3046
3047 foreach my $vmid (keys %$list) {
3048 next if $opt_vmid && ($vmid ne $opt_vmid);
3049 next if !$res->{$vmid}->{pid}; #not running
3050
3051 # we can't use the $qmpclient since it might have already aborted on
3052 # 'query-balloon', but this might also fail for older versions...
3053 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
3054 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
3055 }
3056
3057 foreach my $vmid (keys %$list) {
3058 next if $opt_vmid && ($vmid ne $opt_vmid);
3059 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
3060 }
3061
3062 return $res;
3063 }
3064
3065 sub conf_has_serial {
3066 my ($conf) = @_;
3067
3068 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3069 if ($conf->{"serial$i"}) {
3070 return 1;
3071 }
3072 }
3073
3074 return 0;
3075 }
3076
3077 sub conf_has_audio {
3078 my ($conf, $id) = @_;
3079
3080 $id //= 0;
3081 my $audio = $conf->{"audio$id"};
3082 return if !defined($audio);
3083
3084 my $audioproperties = parse_property_string($audio_fmt, $audio);
3085 my $audiodriver = $audioproperties->{driver} // 'spice';
3086
3087 return {
3088 dev => $audioproperties->{device},
3089 dev_id => "audiodev$id",
3090 backend => $audiodriver,
3091 backend_id => "$audiodriver-backend${id}",
3092 };
3093 }
3094
3095 sub audio_devs {
3096 my ($audio, $audiopciaddr, $machine_version) = @_;
3097
3098 my $devs = [];
3099
3100 my $id = $audio->{dev_id};
3101 my $audiodev = "";
3102 if (min_version($machine_version, 4, 2)) {
3103 $audiodev = ",audiodev=$audio->{backend_id}";
3104 }
3105
3106 if ($audio->{dev} eq 'AC97') {
3107 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
3108 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3109 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
3110 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3111 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
3112 } else {
3113 die "unkown audio device '$audio->{dev}', implement me!";
3114 }
3115
3116 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3117
3118 return $devs;
3119 }
3120
3121 sub get_tpm_paths {
3122 my ($vmid) = @_;
3123 return {
3124 socket => "/var/run/qemu-server/$vmid.swtpm",
3125 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3126 };
3127 }
3128
3129 sub add_tpm_device {
3130 my ($vmid, $devices, $conf) = @_;
3131
3132 return if !$conf->{tpmstate0};
3133
3134 my $paths = get_tpm_paths($vmid);
3135
3136 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3137 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3138 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3139 }
3140
3141 sub start_swtpm {
3142 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3143
3144 return if !$tpmdrive;
3145
3146 my $state;
3147 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3148 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3149 if ($storeid) {
3150 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3151 } else {
3152 $state = $tpm->{file};
3153 }
3154
3155 my $paths = get_tpm_paths($vmid);
3156
3157 # during migration, we will get state from remote
3158 #
3159 if (!$migration) {
3160 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3161 my $setup_cmd = [
3162 "swtpm_setup",
3163 "--tpmstate",
3164 "file://$state",
3165 "--createek",
3166 "--create-ek-cert",
3167 "--create-platform-cert",
3168 "--lock-nvram",
3169 "--config",
3170 "/etc/swtpm_setup.conf", # do not use XDG configs
3171 "--runas",
3172 "0", # force creation as root, error if not possible
3173 "--not-overwrite", # ignore existing state, do not modify
3174 ];
3175
3176 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3177 # TPM 2.0 supports ECC crypto, use if possible
3178 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3179
3180 run_command($setup_cmd, outfunc => sub {
3181 print "swtpm_setup: $1\n";
3182 });
3183 }
3184
3185 my $emulator_cmd = [
3186 "swtpm",
3187 "socket",
3188 "--tpmstate",
3189 "backend-uri=file://$state,mode=0600",
3190 "--ctrl",
3191 "type=unixio,path=$paths->{socket},mode=0600",
3192 "--pid",
3193 "file=$paths->{pid}",
3194 "--terminate", # terminate on QEMU disconnect
3195 "--daemon",
3196 ];
3197 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3198 run_command($emulator_cmd, outfunc => sub { print $1; });
3199
3200 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3201 while (! -e $paths->{pid}) {
3202 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3203 usleep(50_000);
3204 }
3205
3206 # return untainted PID of swtpm daemon so it can be killed on error
3207 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3208 return $1;
3209 }
3210
3211 sub vga_conf_has_spice {
3212 my ($vga) = @_;
3213
3214 my $vgaconf = parse_vga($vga);
3215 my $vgatype = $vgaconf->{type};
3216 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3217
3218 return $1 || 1;
3219 }
3220
3221 sub is_native($) {
3222 my ($arch) = @_;
3223 return get_host_arch() eq $arch;
3224 }
3225
3226 sub get_vm_arch {
3227 my ($conf) = @_;
3228 return $conf->{arch} // get_host_arch();
3229 }
3230
3231 my $default_machines = {
3232 x86_64 => 'pc',
3233 aarch64 => 'virt',
3234 };
3235
3236 sub get_installed_machine_version {
3237 my ($kvmversion) = @_;
3238 $kvmversion = kvm_user_version() if !defined($kvmversion);
3239 $kvmversion =~ m/^(\d+\.\d+)/;
3240 return $1;
3241 }
3242
3243 sub windows_get_pinned_machine_version {
3244 my ($machine, $base_version, $kvmversion) = @_;
3245
3246 my $pin_version = $base_version;
3247 if (!defined($base_version) ||
3248 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3249 ) {
3250 $pin_version = get_installed_machine_version($kvmversion);
3251 }
3252 if (!$machine || $machine eq 'pc') {
3253 $machine = "pc-i440fx-$pin_version";
3254 } elsif ($machine eq 'q35') {
3255 $machine = "pc-q35-$pin_version";
3256 } elsif ($machine eq 'virt') {
3257 $machine = "virt-$pin_version";
3258 } else {
3259 warn "unknown machine type '$machine', not touching that!\n";
3260 }
3261
3262 return $machine;
3263 }
3264
3265 sub get_vm_machine {
3266 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3267
3268 my $machine = $forcemachine || $conf->{machine};
3269
3270 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3271 $kvmversion //= kvm_user_version();
3272 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3273 # layout which confuses windows quite a bit and may result in various regressions..
3274 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3275 if (windows_version($conf->{ostype})) {
3276 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3277 }
3278 $arch //= 'x86_64';
3279 $machine ||= $default_machines->{$arch};
3280 if ($add_pve_version) {
3281 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3282 $machine .= "+pve$pvever";
3283 }
3284 }
3285
3286 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3287 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3288 $machine = $1 if $is_pxe;
3289
3290 # for version-pinned machines that do not include a pve-version (e.g.
3291 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3292 $machine .= '+pve0';
3293
3294 $machine .= '.pxe' if $is_pxe;
3295 }
3296
3297 return $machine;
3298 }
3299
3300 sub get_ovmf_files($$$) {
3301 my ($arch, $efidisk, $smm) = @_;
3302
3303 my $types = $OVMF->{$arch}
3304 or die "no OVMF images known for architecture '$arch'\n";
3305
3306 my $type = 'default';
3307 if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3308 $type = $smm ? "4m" : "4m-no-smm";
3309 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3310 }
3311
3312 return $types->{$type}->@*;
3313 }
3314
3315 my $Arch2Qemu = {
3316 aarch64 => '/usr/bin/qemu-system-aarch64',
3317 x86_64 => '/usr/bin/qemu-system-x86_64',
3318 };
3319 sub get_command_for_arch($) {
3320 my ($arch) = @_;
3321 return '/usr/bin/kvm' if is_native($arch);
3322
3323 my $cmd = $Arch2Qemu->{$arch}
3324 or die "don't know how to emulate architecture '$arch'\n";
3325 return $cmd;
3326 }
3327
3328 # To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3329 # to use in a QEMU command line (-cpu element), first array_intersect the result
3330 # of query_supported_ with query_understood_. This is necessary because:
3331 #
3332 # a) query_understood_ returns flags the host cannot use and
3333 # b) query_supported_ (rather the QMP call) doesn't actually return CPU
3334 # flags, but CPU settings - with most of them being flags. Those settings
3335 # (and some flags, curiously) cannot be specified as a "-cpu" argument.
3336 #
3337 # query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3338 # expensive. If you need the value returned from this, you can get it much
3339 # cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3340 # $accel being 'kvm' or 'tcg'.
3341 #
3342 # pvestatd calls this function on startup and whenever the QEMU/KVM version
3343 # changes, automatically populating pmxcfs.
3344 #
3345 # Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3346 # since kvm and tcg machines support different flags
3347 #
3348 sub query_supported_cpu_flags {
3349 my ($arch) = @_;
3350
3351 $arch //= get_host_arch();
3352 my $default_machine = $default_machines->{$arch};
3353
3354 my $flags = {};
3355
3356 # FIXME: Once this is merged, the code below should work for ARM as well:
3357 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3358 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3359 $arch eq "aarch64";
3360
3361 my $kvm_supported = defined(kvm_version());
3362 my $qemu_cmd = get_command_for_arch($arch);
3363 my $fakevmid = -1;
3364 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3365
3366 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3367 my $query_supported_run_qemu = sub {
3368 my ($kvm) = @_;
3369
3370 my $flags = {};
3371 my $cmd = [
3372 $qemu_cmd,
3373 '-machine', $default_machine,
3374 '-display', 'none',
3375 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3376 '-mon', 'chardev=qmp,mode=control',
3377 '-pidfile', $pidfile,
3378 '-S', '-daemonize'
3379 ];
3380
3381 if (!$kvm) {
3382 push @$cmd, '-accel', 'tcg';
3383 }
3384
3385 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3386 die "QEMU flag querying VM exited with code " . $rc if $rc;
3387
3388 eval {
3389 my $cmd_result = mon_cmd(
3390 $fakevmid,
3391 'query-cpu-model-expansion',
3392 type => 'full',
3393 model => { name => 'host' }
3394 );
3395
3396 my $props = $cmd_result->{model}->{props};
3397 foreach my $prop (keys %$props) {
3398 next if $props->{$prop} ne '1';
3399 # QEMU returns some flags multiple times, with '_', '.' or '-'
3400 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3401 # We only keep those with underscores, to match /proc/cpuinfo
3402 $prop =~ s/\.|-/_/g;
3403 $flags->{$prop} = 1;
3404 }
3405 };
3406 my $err = $@;
3407
3408 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3409 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3410
3411 die $err if $err;
3412
3413 return [ sort keys %$flags ];
3414 };
3415
3416 # We need to query QEMU twice, since KVM and TCG have different supported flags
3417 PVE::QemuConfig->lock_config($fakevmid, sub {
3418 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3419 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3420
3421 if ($kvm_supported) {
3422 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3423 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3424 }
3425 });
3426
3427 return $flags;
3428 }
3429
3430 # Understood CPU flags are written to a file at 'pve-qemu' compile time
3431 my $understood_cpu_flag_dir = "/usr/share/kvm";
3432 sub query_understood_cpu_flags {
3433 my $arch = get_host_arch();
3434 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3435
3436 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3437 if ! -e $filepath;
3438
3439 my $raw = file_get_contents($filepath);
3440 $raw =~ s/^\s+|\s+$//g;
3441 my @flags = split(/\s+/, $raw);
3442
3443 return \@flags;
3444 }
3445
3446 # Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
3447 # anymore. But smm=off seems to be required when using SeaBIOS and serial display.
3448 my sub should_disable_smm {
3449 my ($conf, $vga) = @_;
3450
3451 return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
3452 $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
3453 }
3454
3455 sub config_to_command {
3456 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3457 $pbs_backing) = @_;
3458
3459 my $cmd = [];
3460 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3461 my $devices = [];
3462 my $bridges = {};
3463 my $ostype = $conf->{ostype};
3464 my $winversion = windows_version($ostype);
3465 my $kvm = $conf->{kvm};
3466 my $nodename = nodename();
3467
3468 my $arch = get_vm_arch($conf);
3469 my $kvm_binary = get_command_for_arch($arch);
3470 my $kvmver = kvm_user_version($kvm_binary);
3471
3472 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3473 $kvmver //= "undefined";
3474 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3475 }
3476
3477 my $add_pve_version = min_version($kvmver, 4, 1);
3478
3479 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3480 my $machine_version = extract_version($machine_type, $kvmver);
3481 $kvm //= 1 if is_native($arch);
3482
3483 $machine_version =~ m/(\d+)\.(\d+)/;
3484 my ($machine_major, $machine_minor) = ($1, $2);
3485
3486 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3487 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3488 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3489 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3490 ." please upgrade node '$nodename'\n"
3491 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3492 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3493 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3494 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3495 ." node '$nodename'\n";
3496 }
3497
3498 # if a specific +pve version is required for a feature, use $version_guard
3499 # instead of min_version to allow machines to be run with the minimum
3500 # required version
3501 my $required_pve_version = 0;
3502 my $version_guard = sub {
3503 my ($major, $minor, $pve) = @_;
3504 return 0 if !min_version($machine_version, $major, $minor, $pve);
3505 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3506 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3507 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3508 return 1;
3509 };
3510
3511 if ($kvm && !defined kvm_version()) {
3512 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3513 ." or enable in BIOS.\n";
3514 }
3515
3516 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3517 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3518 my $use_old_bios_files = undef;
3519 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3520
3521 push @$cmd, $kvm_binary;
3522
3523 push @$cmd, '-id', $vmid;
3524
3525 my $vmname = $conf->{name} || "vm$vmid";
3526
3527 push @$cmd, '-name', "$vmname,debug-threads=on";
3528
3529 push @$cmd, '-no-shutdown';
3530
3531 my $use_virtio = 0;
3532
3533 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3534 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3535 push @$cmd, '-mon', "chardev=qmp,mode=control";
3536
3537 if (min_version($machine_version, 2, 12)) {
3538 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3539 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3540 }
3541
3542 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3543
3544 push @$cmd, '-daemonize';
3545
3546 if ($conf->{smbios1}) {
3547 my $smbios_conf = parse_smbios1($conf->{smbios1});
3548 if ($smbios_conf->{base64}) {
3549 # Do not pass base64 flag to qemu
3550 delete $smbios_conf->{base64};
3551 my $smbios_string = "";
3552 foreach my $key (keys %$smbios_conf) {
3553 my $value;
3554 if ($key eq "uuid") {
3555 $value = $smbios_conf->{uuid}
3556 } else {
3557 $value = decode_base64($smbios_conf->{$key});
3558 }
3559 # qemu accepts any binary data, only commas need escaping by double comma
3560 $value =~ s/,/,,/g;
3561 $smbios_string .= "," . $key . "=" . $value if $value;
3562 }
3563 push @$cmd, '-smbios', "type=1" . $smbios_string;
3564 } else {
3565 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3566 }
3567 }
3568
3569 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3570 my $d;
3571 if (my $efidisk = $conf->{efidisk0}) {
3572 $d = parse_drive('efidisk0', $efidisk);
3573 }
3574
3575 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3576 die "uefi base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3577
3578 my ($path, $format);
3579 my $read_only_str = '';
3580 if ($d) {
3581 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3582 $format = $d->{format};
3583 if ($storeid) {
3584 $path = PVE::Storage::path($storecfg, $d->{file});
3585 if (!defined($format)) {
3586 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3587 $format = qemu_img_format($scfg, $volname);
3588 }
3589 } else {
3590 $path = $d->{file};
3591 die "efidisk format must be specified\n"
3592 if !defined($format);
3593 }
3594
3595 $read_only_str = ',readonly=on' if drive_is_read_only($conf, $d);
3596 } else {
3597 log_warn("no efidisk configured! Using temporary efivars disk.");
3598 $path = "/tmp/$vmid-ovmf.fd";
3599 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3600 $format = 'raw';
3601 }
3602
3603 my $size_str = "";
3604
3605 if ($format eq 'raw' && $version_guard->(4, 1, 2)) {
3606 $size_str = ",size=" . (-s $ovmf_vars);
3607 }
3608
3609 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3610 my $cache = "";
3611 if ($path =~ m/^rbd:/) {
3612 $cache = ',cache=writeback';
3613 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3614 }
3615
3616 push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code";
3617 push @$cmd, '-drive', "if=pflash,unit=1$cache,format=$format,id=drive-efidisk0$size_str,file=${path}${read_only_str}";
3618 }
3619
3620 if ($q35) { # tell QEMU to load q35 config early
3621 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3622 if (min_version($machine_version, 4, 0)) {
3623 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3624 } else {
3625 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3626 }
3627 }
3628
3629 if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
3630 push @$cmd, $fixups->@*;
3631 }
3632
3633 if ($conf->{vmgenid}) {
3634 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3635 }
3636
3637 # add usb controllers
3638 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3639 $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES);
3640 push @$devices, @usbcontrollers if @usbcontrollers;
3641 my $vga = parse_vga($conf->{vga});
3642
3643 my $qxlnum = vga_conf_has_spice($conf->{vga});
3644 $vga->{type} = 'qxl' if $qxlnum;
3645
3646 if (!$vga->{type}) {
3647 if ($arch eq 'aarch64') {
3648 $vga->{type} = 'virtio';
3649 } elsif (min_version($machine_version, 2, 9)) {
3650 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3651 } else {
3652 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3653 }
3654 }
3655
3656 # enable absolute mouse coordinates (needed by vnc)
3657 my $tablet = $conf->{tablet};
3658 if (!defined($tablet)) {
3659 $tablet = $defaults->{tablet};
3660 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3661 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3662 }
3663
3664 if ($tablet) {
3665 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3666 my $kbd = print_keyboarddevice_full($conf, $arch);
3667 push @$devices, '-device', $kbd if defined($kbd);
3668 }
3669
3670 my $bootorder = device_bootorder($conf);
3671
3672 # host pci device passthrough
3673 my ($kvm_off, $gpu_passthrough, $legacy_igd) = PVE::QemuServer::PCI::print_hostpci_devices(
3674 $vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder);
3675
3676 # usb devices
3677 my $usb_dev_features = {};
3678 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3679
3680 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3681 $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder);
3682 push @$devices, @usbdevices if @usbdevices;
3683
3684 # serial devices
3685 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3686 my $path = $conf->{"serial$i"} or next;
3687 if ($path eq 'socket') {
3688 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3689 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3690 # On aarch64, serial0 is the UART device. Qemu only allows
3691 # connecting UART devices via the '-serial' command line, as
3692 # the device has a fixed slot on the hardware...
3693 if ($arch eq 'aarch64' && $i == 0) {
3694 push @$devices, '-serial', "chardev:serial$i";
3695 } else {
3696 push @$devices, '-device', "isa-serial,chardev=serial$i";
3697 }
3698 } else {
3699 die "no such serial device\n" if ! -c $path;
3700 push @$devices, '-chardev', "tty,id=serial$i,path=$path";
3701 push @$devices, '-device', "isa-serial,chardev=serial$i";
3702 }
3703 }
3704
3705 # parallel devices
3706 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3707 if (my $path = $conf->{"parallel$i"}) {
3708 die "no such parallel device\n" if ! -c $path;
3709 my $devtype = $path =~ m!^/dev/usb/lp! ? 'tty' : 'parport';
3710 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3711 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3712 }
3713 }
3714
3715 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3716 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3717 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3718 push @$devices, @$audio_devs;
3719 }
3720
3721 add_tpm_device($vmid, $devices, $conf);
3722
3723 my $sockets = 1;
3724 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3725 $sockets = $conf->{sockets} if $conf->{sockets};
3726
3727 my $cores = $conf->{cores} || 1;
3728
3729 my $maxcpus = $sockets * $cores;
3730
3731 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3732
3733 my $allowed_vcpus = $cpuinfo->{cpus};
3734
3735 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3736
3737 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3738 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3739 for (my $i = 2; $i <= $vcpus; $i++) {
3740 my $cpustr = print_cpu_device($conf,$i);
3741 push @$cmd, '-device', $cpustr;
3742 }
3743
3744 } else {
3745
3746 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3747 }
3748 push @$cmd, '-nodefaults';
3749
3750 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3751
3752 push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3753
3754 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3755
3756 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3757 push @$devices, '-device', print_vga_device(
3758 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3759
3760 push @$cmd, '-display', 'egl-headless,gl=core' if $vga->{type} eq 'virtio-gl'; # VIRGL
3761
3762 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3763 push @$cmd, '-vnc', "unix:$socket,password=on";
3764 } else {
3765 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3766 push @$cmd, '-nographic';
3767 }
3768
3769 # time drift fix
3770 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3771 my $useLocaltime = $conf->{localtime};
3772
3773 if ($winversion >= 5) { # windows
3774 $useLocaltime = 1 if !defined($conf->{localtime});
3775
3776 # use time drift fix when acpi is enabled
3777 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3778 $tdf = 1 if !defined($conf->{tdf});
3779 }
3780 }
3781
3782 if ($winversion >= 6) {
3783 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3784 push @$cmd, '-no-hpet';
3785 }
3786
3787 push @$rtcFlags, 'driftfix=slew' if $tdf;
3788
3789 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3790 push @$rtcFlags, "base=$conf->{startdate}";
3791 } elsif ($useLocaltime) {
3792 push @$rtcFlags, 'base=localtime';
3793 }
3794
3795 if ($forcecpu) {
3796 push @$cmd, '-cpu', $forcecpu;
3797 } else {
3798 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3799 }
3800
3801 PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
3802
3803 push @$cmd, '-S' if $conf->{freeze};
3804
3805 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3806
3807 my $guest_agent = parse_guest_agent($conf);
3808
3809 if ($guest_agent->{enabled}) {
3810 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3811 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3812
3813 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3814 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3815 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3816 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3817 } elsif ($guest_agent->{type} eq 'isa') {
3818 push @$devices, '-device', "isa-serial,chardev=qga0";
3819 }
3820 }
3821
3822 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3823 if ($rng && $version_guard->(4, 1, 2)) {
3824 check_rng_source($rng->{source});
3825
3826 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3827 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3828 my $limiter_str = "";
3829 if ($max_bytes) {
3830 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3831 }
3832
3833 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3834 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3835 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3836 }
3837
3838 my $spice_port;
3839
3840 if ($qxlnum || $vga->{type} =~ /^virtio/) {
3841 if ($qxlnum > 1) {
3842 if ($winversion){
3843 for (my $i = 1; $i < $qxlnum; $i++){
3844 push @$devices, '-device', print_vga_device(
3845 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3846 }
3847 } else {
3848 # assume other OS works like Linux
3849 my ($ram, $vram) = ("134217728", "67108864");
3850 if ($vga->{memory}) {
3851 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3852 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3853 }
3854 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3855 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3856 }
3857 }
3858
3859 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3860
3861 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3862 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3863 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3864
3865 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3866 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3867 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3868
3869 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3870 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3871
3872 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3873 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3874 if ($spice_enhancement->{foldersharing}) {
3875 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3876 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3877 }
3878
3879 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3880 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3881 if $spice_enhancement->{videostreaming};
3882
3883 push @$devices, '-spice', "$spice_opts";
3884 }
3885
3886 # enable balloon by default, unless explicitly disabled
3887 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3888 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3889 my $ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr";
3890 $ballooncmd .= ",free-page-reporting=on" if min_version($machine_version, 6, 2);
3891 push @$devices, '-device', $ballooncmd;
3892 }
3893
3894 if ($conf->{watchdog}) {
3895 my $wdopts = parse_watchdog($conf->{watchdog});
3896 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3897 my $watchdog = $wdopts->{model} || 'i6300esb';
3898 push @$devices, '-device', "$watchdog$pciaddr";
3899 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3900 }
3901
3902 my $vollist = [];
3903 my $scsicontroller = {};
3904 my $ahcicontroller = {};
3905 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3906
3907 # Add iscsi initiator name if available
3908 if (my $initiator = get_initiator_name()) {
3909 push @$devices, '-iscsi', "initiator-name=$initiator";
3910 }
3911
3912 PVE::QemuConfig->foreach_volume($conf, sub {
3913 my ($ds, $drive) = @_;
3914
3915 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3916 check_volume_storage_type($storecfg, $drive->{file});
3917 push @$vollist, $drive->{file};
3918 }
3919
3920 # ignore efidisk here, already added in bios/fw handling code above
3921 return if $drive->{interface} eq 'efidisk';
3922 # similar for TPM
3923 return if $drive->{interface} eq 'tpmstate';
3924
3925 $use_virtio = 1 if $ds =~ m/^virtio/;
3926
3927 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3928
3929 if ($drive->{interface} eq 'virtio'){
3930 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
3931 }
3932
3933 if ($drive->{interface} eq 'scsi') {
3934
3935 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
3936
3937 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
3938 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
3939
3940 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
3941 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
3942
3943 my $iothread = '';
3944 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
3945 $iothread .= ",iothread=iothread-$controller_prefix$controller";
3946 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
3947 } elsif ($drive->{iothread}) {
3948 warn "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n";
3949 }
3950
3951 my $queues = '';
3952 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
3953 $queues = ",num_queues=$drive->{queues}";
3954 }
3955
3956 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
3957 if !$scsicontroller->{$controller};
3958 $scsicontroller->{$controller}=1;
3959 }
3960
3961 if ($drive->{interface} eq 'sata') {
3962 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
3963 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
3964 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
3965 if !$ahcicontroller->{$controller};
3966 $ahcicontroller->{$controller}=1;
3967 }
3968
3969 my $pbs_conf = $pbs_backing->{$ds};
3970 my $pbs_name = undef;
3971 if ($pbs_conf) {
3972 $pbs_name = "drive-$ds-pbs";
3973 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
3974 }
3975
3976 my $drive_cmd = print_drive_commandline_full(
3977 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
3978
3979 # extra protection for templates, but SATA and IDE don't support it..
3980 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
3981
3982 push @$devices, '-drive',$drive_cmd;
3983 push @$devices, '-device', print_drivedevice_full(
3984 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
3985 });
3986
3987 for (my $i = 0; $i < $MAX_NETS; $i++) {
3988 my $netname = "net$i";
3989
3990 next if !$conf->{$netname};
3991 my $d = parse_net($conf->{$netname});
3992 next if !$d;
3993
3994 $use_virtio = 1 if $d->{model} eq 'virtio';
3995
3996 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
3997
3998 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
3999 push @$devices, '-netdev', $netdevfull;
4000
4001 my $netdevicefull = print_netdevice_full(
4002 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type);
4003
4004 push @$devices, '-device', $netdevicefull;
4005 }
4006
4007 if ($conf->{ivshmem}) {
4008 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
4009
4010 my $bus;
4011 if ($q35) {
4012 $bus = print_pcie_addr("ivshmem");
4013 } else {
4014 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
4015 }
4016
4017 my $ivshmem_name = $ivshmem->{name} // $vmid;
4018 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
4019
4020 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
4021 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
4022 .",size=$ivshmem->{size}M";
4023 }
4024
4025 # pci.4 is nested in pci.1
4026 $bridges->{1} = 1 if $bridges->{4};
4027
4028 if (!$q35) { # add pci bridges
4029 if (min_version($machine_version, 2, 3)) {
4030 $bridges->{1} = 1;
4031 $bridges->{2} = 1;
4032 }
4033 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
4034 }
4035
4036 for my $k (sort {$b cmp $a} keys %$bridges) {
4037 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
4038
4039 my $k_name = $k;
4040 if ($k == 2 && $legacy_igd) {
4041 $k_name = "$k-igd";
4042 }
4043 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
4044 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
4045
4046 if ($q35) { # add after -readconfig pve-q35.cfg
4047 splice @$devices, 2, 0, '-device', $devstr;
4048 } else {
4049 unshift @$devices, '-device', $devstr if $k > 0;
4050 }
4051 }
4052
4053 if (!$kvm) {
4054 push @$machineFlags, 'accel=tcg';
4055 }
4056
4057 push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga);
4058
4059 my $machine_type_min = $machine_type;
4060 if ($add_pve_version) {
4061 $machine_type_min =~ s/\+pve\d+$//;
4062 $machine_type_min .= "+pve$required_pve_version";
4063 }
4064 push @$machineFlags, "type=${machine_type_min}";
4065
4066 push @$cmd, @$devices;
4067 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
4068 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
4069 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
4070
4071 if (my $vmstate = $conf->{vmstate}) {
4072 my $statepath = PVE::Storage::path($storecfg, $vmstate);
4073 push @$vollist, $vmstate;
4074 push @$cmd, '-loadstate', $statepath;
4075 print "activating and using '$vmstate' as vmstate\n";
4076 }
4077
4078 if (PVE::QemuConfig->is_template($conf)) {
4079 # needed to workaround base volumes being read-only
4080 push @$cmd, '-snapshot';
4081 }
4082
4083 # add custom args
4084 if ($conf->{args}) {
4085 my $aa = PVE::Tools::split_args($conf->{args});
4086 push @$cmd, @$aa;
4087 }
4088
4089 return wantarray ? ($cmd, $vollist, $spice_port) : $cmd;
4090 }
4091
4092 sub check_rng_source {
4093 my ($source) = @_;
4094
4095 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4096 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4097 if ! -e $source;
4098
4099 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4100 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4101 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4102 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4103 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4104 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4105 ." to the host.\n";
4106 }
4107 }
4108
4109 sub spice_port {
4110 my ($vmid) = @_;
4111
4112 my $res = mon_cmd($vmid, 'query-spice');
4113
4114 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
4115 }
4116
4117 sub vm_devices_list {
4118 my ($vmid) = @_;
4119
4120 my $res = mon_cmd($vmid, 'query-pci');
4121 my $devices_to_check = [];
4122 my $devices = {};
4123 foreach my $pcibus (@$res) {
4124 push @$devices_to_check, @{$pcibus->{devices}},
4125 }
4126
4127 while (@$devices_to_check) {
4128 my $to_check = [];
4129 for my $d (@$devices_to_check) {
4130 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
4131 next if !$d->{'pci_bridge'};
4132
4133 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4134 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
4135 }
4136 $devices_to_check = $to_check;
4137 }
4138
4139 my $resblock = mon_cmd($vmid, 'query-block');
4140 foreach my $block (@$resblock) {
4141 if($block->{device} =~ m/^drive-(\S+)/){
4142 $devices->{$1} = 1;
4143 }
4144 }
4145
4146 my $resmice = mon_cmd($vmid, 'query-mice');
4147 foreach my $mice (@$resmice) {
4148 if ($mice->{name} eq 'QEMU HID Tablet') {
4149 $devices->{tablet} = 1;
4150 last;
4151 }
4152 }
4153
4154 # for usb devices there is no query-usb
4155 # but we can iterate over the entries in
4156 # qom-list path=/machine/peripheral
4157 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4158 foreach my $per (@$resperipheral) {
4159 if ($per->{name} =~ m/^usb\d+$/) {
4160 $devices->{$per->{name}} = 1;
4161 }
4162 }
4163
4164 return $devices;
4165 }
4166
4167 sub vm_deviceplug {
4168 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4169
4170 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4171
4172 my $devices_list = vm_devices_list($vmid);
4173 return 1 if defined($devices_list->{$deviceid});
4174
4175 # add PCI bridge if we need it for the device
4176 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4177
4178 if ($deviceid eq 'tablet') {
4179 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4180 } elsif ($deviceid eq 'keyboard') {
4181 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4182 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4183 die "usb hotplug currently not reliable\n";
4184 # since we can't reliably hot unplug all added usb devices and usb
4185 # passthrough breaks live migration we disable usb hotplugging for now
4186 #qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device));
4187 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4188 qemu_iothread_add($vmid, $deviceid, $device);
4189
4190 qemu_driveadd($storecfg, $vmid, $device);
4191 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4192
4193 qemu_deviceadd($vmid, $devicefull);
4194 eval { qemu_deviceaddverify($vmid, $deviceid); };
4195 if (my $err = $@) {
4196 eval { qemu_drivedel($vmid, $deviceid); };
4197 warn $@ if $@;
4198 die $err;
4199 }
4200 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4201 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4202 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4203 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4204
4205 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4206
4207 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4208 qemu_iothread_add($vmid, $deviceid, $device);
4209 $devicefull .= ",iothread=iothread-$deviceid";
4210 }
4211
4212 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4213 $devicefull .= ",num_queues=$device->{queues}";
4214 }
4215
4216 qemu_deviceadd($vmid, $devicefull);
4217 qemu_deviceaddverify($vmid, $deviceid);
4218 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4219 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4220 qemu_driveadd($storecfg, $vmid, $device);
4221
4222 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4223 eval { qemu_deviceadd($vmid, $devicefull); };
4224 if (my $err = $@) {
4225 eval { qemu_drivedel($vmid, $deviceid); };
4226 warn $@ if $@;
4227 die $err;
4228 }
4229 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4230 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4231
4232 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4233 my $use_old_bios_files = undef;
4234 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4235
4236 my $netdevicefull = print_netdevice_full(
4237 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type);
4238 qemu_deviceadd($vmid, $netdevicefull);
4239 eval {
4240 qemu_deviceaddverify($vmid, $deviceid);
4241 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4242 };
4243 if (my $err = $@) {
4244 eval { qemu_netdevdel($vmid, $deviceid); };
4245 warn $@ if $@;
4246 die $err;
4247 }
4248 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4249 my $bridgeid = $2;
4250 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4251 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4252
4253 qemu_deviceadd($vmid, $devicefull);
4254 qemu_deviceaddverify($vmid, $deviceid);
4255 } else {
4256 die "can't hotplug device '$deviceid'\n";
4257 }
4258
4259 return 1;
4260 }
4261
4262 # fixme: this should raise exceptions on error!
4263 sub vm_deviceunplug {
4264 my ($vmid, $conf, $deviceid) = @_;
4265
4266 my $devices_list = vm_devices_list($vmid);
4267 return 1 if !defined($devices_list->{$deviceid});
4268
4269 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4270 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4271
4272 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard') {
4273 qemu_devicedel($vmid, $deviceid);
4274 } elsif ($deviceid =~ m/^usb\d+$/) {
4275 die "usb hotplug currently not reliable\n";
4276 # when unplugging usb devices this way, there may be remaining usb
4277 # controllers/hubs so we disable it for now
4278 #qemu_devicedel($vmid, $deviceid);
4279 #qemu_devicedelverify($vmid, $deviceid);
4280 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4281 my $device = parse_drive($deviceid, $conf->{$deviceid});
4282
4283 qemu_devicedel($vmid, $deviceid);
4284 qemu_devicedelverify($vmid, $deviceid);
4285 qemu_drivedel($vmid, $deviceid);
4286 qemu_iothread_del($vmid, $deviceid, $device);
4287 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4288 qemu_devicedel($vmid, $deviceid);
4289 qemu_devicedelverify($vmid, $deviceid);
4290 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4291 my $device = parse_drive($deviceid, $conf->{$deviceid});
4292
4293 qemu_devicedel($vmid, $deviceid);
4294 qemu_devicedelverify($vmid, $deviceid);
4295 qemu_drivedel($vmid, $deviceid);
4296 qemu_deletescsihw($conf, $vmid, $deviceid);
4297
4298 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4299 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4300 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4301 qemu_devicedel($vmid, $deviceid);
4302 qemu_devicedelverify($vmid, $deviceid);
4303 qemu_netdevdel($vmid, $deviceid);
4304 } else {
4305 die "can't unplug device '$deviceid'\n";
4306 }
4307
4308 return 1;
4309 }
4310
4311 sub qemu_deviceadd {
4312 my ($vmid, $devicefull) = @_;
4313
4314 $devicefull = "driver=".$devicefull;
4315 my %options = split(/[=,]/, $devicefull);
4316
4317 mon_cmd($vmid, "device_add" , %options);
4318 }
4319
4320 sub qemu_devicedel {
4321 my ($vmid, $deviceid) = @_;
4322
4323 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
4324 }
4325
4326 sub qemu_iothread_add {
4327 my ($vmid, $deviceid, $device) = @_;
4328
4329 if ($device->{iothread}) {
4330 my $iothreads = vm_iothreads_list($vmid);
4331 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4332 }
4333 }
4334
4335 sub qemu_iothread_del {
4336 my ($vmid, $deviceid, $device) = @_;
4337
4338 if ($device->{iothread}) {
4339 my $iothreads = vm_iothreads_list($vmid);
4340 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4341 }
4342 }
4343
4344 sub qemu_objectadd {
4345 my ($vmid, $objectid, $qomtype) = @_;
4346
4347 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4348
4349 return 1;
4350 }
4351
4352 sub qemu_objectdel {
4353 my ($vmid, $objectid) = @_;
4354
4355 mon_cmd($vmid, "object-del", id => $objectid);
4356
4357 return 1;
4358 }
4359
4360 sub qemu_driveadd {
4361 my ($storecfg, $vmid, $device) = @_;
4362
4363 my $kvmver = get_running_qemu_version($vmid);
4364 my $io_uring = min_version($kvmver, 6, 0);
4365 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4366 $drive =~ s/\\/\\\\/g;
4367 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4368
4369 # If the command succeeds qemu prints: "OK"
4370 return 1 if $ret =~ m/OK/s;
4371
4372 die "adding drive failed: $ret\n";
4373 }
4374
4375 sub qemu_drivedel {
4376 my ($vmid, $deviceid) = @_;
4377
4378 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4379 $ret =~ s/^\s+//;
4380
4381 return 1 if $ret eq "";
4382
4383 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4384 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4385
4386 die "deleting drive $deviceid failed : $ret\n";
4387 }
4388
4389 sub qemu_deviceaddverify {
4390 my ($vmid, $deviceid) = @_;
4391
4392 for (my $i = 0; $i <= 5; $i++) {
4393 my $devices_list = vm_devices_list($vmid);
4394 return 1 if defined($devices_list->{$deviceid});
4395 sleep 1;
4396 }
4397
4398 die "error on hotplug device '$deviceid'\n";
4399 }
4400
4401
4402 sub qemu_devicedelverify {
4403 my ($vmid, $deviceid) = @_;
4404
4405 # need to verify that the device is correctly removed as device_del
4406 # is async and empty return is not reliable
4407
4408 for (my $i = 0; $i <= 5; $i++) {
4409 my $devices_list = vm_devices_list($vmid);
4410 return 1 if !defined($devices_list->{$deviceid});
4411 sleep 1;
4412 }
4413
4414 die "error on hot-unplugging device '$deviceid'\n";
4415 }
4416
4417 sub qemu_findorcreatescsihw {
4418 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4419
4420 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4421
4422 my $scsihwid="$controller_prefix$controller";
4423 my $devices_list = vm_devices_list($vmid);
4424
4425 if (!defined($devices_list->{$scsihwid})) {
4426 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4427 }
4428
4429 return 1;
4430 }
4431
4432 sub qemu_deletescsihw {
4433 my ($conf, $vmid, $opt) = @_;
4434
4435 my $device = parse_drive($opt, $conf->{$opt});
4436
4437 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4438 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4439 return 1;
4440 }
4441
4442 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4443
4444 my $devices_list = vm_devices_list($vmid);
4445 foreach my $opt (keys %{$devices_list}) {
4446 if (is_valid_drivename($opt)) {
4447 my $drive = parse_drive($opt, $conf->{$opt});
4448 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4449 return 1;
4450 }
4451 }
4452 }
4453
4454 my $scsihwid="scsihw$controller";
4455
4456 vm_deviceunplug($vmid, $conf, $scsihwid);
4457
4458 return 1;
4459 }
4460
4461 sub qemu_add_pci_bridge {
4462 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4463
4464 my $bridges = {};
4465
4466 my $bridgeid;
4467
4468 print_pci_addr($device, $bridges, $arch, $machine_type);
4469
4470 while (my ($k, $v) = each %$bridges) {
4471 $bridgeid = $k;
4472 }
4473 return 1 if !defined($bridgeid) || $bridgeid < 1;
4474
4475 my $bridge = "pci.$bridgeid";
4476 my $devices_list = vm_devices_list($vmid);
4477
4478 if (!defined($devices_list->{$bridge})) {
4479 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4480 }
4481
4482 return 1;
4483 }
4484
4485 sub qemu_set_link_status {
4486 my ($vmid, $device, $up) = @_;
4487
4488 mon_cmd($vmid, "set_link", name => $device,
4489 up => $up ? JSON::true : JSON::false);
4490 }
4491
4492 sub qemu_netdevadd {
4493 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4494
4495 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4496 my %options = split(/[=,]/, $netdev);
4497
4498 if (defined(my $vhost = $options{vhost})) {
4499 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4500 }
4501
4502 if (defined(my $queues = $options{queues})) {
4503 $options{queues} = $queues + 0;
4504 }
4505
4506 mon_cmd($vmid, "netdev_add", %options);
4507 return 1;
4508 }
4509
4510 sub qemu_netdevdel {
4511 my ($vmid, $deviceid) = @_;
4512
4513 mon_cmd($vmid, "netdev_del", id => $deviceid);
4514 }
4515
4516 sub qemu_usb_hotplug {
4517 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4518
4519 return if !$device;
4520
4521 # remove the old one first
4522 vm_deviceunplug($vmid, $conf, $deviceid);
4523
4524 # check if xhci controller is necessary and available
4525 if ($device->{usb3}) {
4526
4527 my $devicelist = vm_devices_list($vmid);
4528
4529 if (!$devicelist->{xhci}) {
4530 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4531 qemu_deviceadd($vmid, "nec-usb-xhci,id=xhci$pciaddr");
4532 }
4533 }
4534 my $d = parse_usb_device($device->{host});
4535 $d->{usb3} = $device->{usb3};
4536
4537 # add the new one
4538 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d, $arch, $machine_type);
4539 }
4540
4541 sub qemu_cpu_hotplug {
4542 my ($vmid, $conf, $vcpus) = @_;
4543
4544 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4545
4546 my $sockets = 1;
4547 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4548 $sockets = $conf->{sockets} if $conf->{sockets};
4549 my $cores = $conf->{cores} || 1;
4550 my $maxcpus = $sockets * $cores;
4551
4552 $vcpus = $maxcpus if !$vcpus;
4553
4554 die "you can't add more vcpus than maxcpus\n"
4555 if $vcpus > $maxcpus;
4556
4557 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4558
4559 if ($vcpus < $currentvcpus) {
4560
4561 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4562
4563 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4564 qemu_devicedel($vmid, "cpu$i");
4565 my $retry = 0;
4566 my $currentrunningvcpus = undef;
4567 while (1) {
4568 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4569 last if scalar(@{$currentrunningvcpus}) == $i-1;
4570 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4571 $retry++;
4572 sleep 1;
4573 }
4574 #update conf after each succesfull cpu unplug
4575 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4576 PVE::QemuConfig->write_config($vmid, $conf);
4577 }
4578 } else {
4579 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4580 }
4581
4582 return;
4583 }
4584
4585 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4586 die "vcpus in running vm does not match its configuration\n"
4587 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4588
4589 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4590
4591 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4592 my $cpustr = print_cpu_device($conf, $i);
4593 qemu_deviceadd($vmid, $cpustr);
4594
4595 my $retry = 0;
4596 my $currentrunningvcpus = undef;
4597 while (1) {
4598 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4599 last if scalar(@{$currentrunningvcpus}) == $i;
4600 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4601 sleep 1;
4602 $retry++;
4603 }
4604 #update conf after each succesfull cpu hotplug
4605 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4606 PVE::QemuConfig->write_config($vmid, $conf);
4607 }
4608 } else {
4609
4610 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4611 mon_cmd($vmid, "cpu-add", id => int($i));
4612 }
4613 }
4614 }
4615
4616 sub qemu_block_set_io_throttle {
4617 my ($vmid, $deviceid,
4618 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4619 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4620 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4621 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4622
4623 return if !check_running($vmid) ;
4624
4625 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4626 bps => int($bps),
4627 bps_rd => int($bps_rd),
4628 bps_wr => int($bps_wr),
4629 iops => int($iops),
4630 iops_rd => int($iops_rd),
4631 iops_wr => int($iops_wr),
4632 bps_max => int($bps_max),
4633 bps_rd_max => int($bps_rd_max),
4634 bps_wr_max => int($bps_wr_max),
4635 iops_max => int($iops_max),
4636 iops_rd_max => int($iops_rd_max),
4637 iops_wr_max => int($iops_wr_max),
4638 bps_max_length => int($bps_max_length),
4639 bps_rd_max_length => int($bps_rd_max_length),
4640 bps_wr_max_length => int($bps_wr_max_length),
4641 iops_max_length => int($iops_max_length),
4642 iops_rd_max_length => int($iops_rd_max_length),
4643 iops_wr_max_length => int($iops_wr_max_length),
4644 );
4645
4646 }
4647
4648 sub qemu_block_resize {
4649 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4650
4651 my $running = check_running($vmid);
4652
4653 $size = 0 if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4654
4655 return if !$running;
4656
4657 my $padding = (1024 - $size % 1024) % 1024;
4658 $size = $size + $padding;
4659
4660 mon_cmd(
4661 $vmid,
4662 "block_resize",
4663 device => $deviceid,
4664 size => int($size),
4665 timeout => 60,
4666 );
4667 }
4668
4669 sub qemu_volume_snapshot {
4670 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4671
4672 my $running = check_running($vmid);
4673
4674 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4675 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4676 } else {
4677 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4678 }
4679 }
4680
4681 sub qemu_volume_snapshot_delete {
4682 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4683
4684 my $running = check_running($vmid);
4685
4686 if($running) {
4687
4688 $running = undef;
4689 my $conf = PVE::QemuConfig->load_config($vmid);
4690 PVE::QemuConfig->foreach_volume($conf, sub {
4691 my ($ds, $drive) = @_;
4692 $running = 1 if $drive->{file} eq $volid;
4693 });
4694 }
4695
4696 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4697 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
4698 } else {
4699 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4700 }
4701 }
4702
4703 sub set_migration_caps {
4704 my ($vmid, $savevm) = @_;
4705
4706 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4707
4708 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4709 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4710
4711 my $cap_ref = [];
4712
4713 my $enabled_cap = {
4714 "auto-converge" => 1,
4715 "xbzrle" => 1,
4716 "x-rdma-pin-all" => 0,
4717 "zero-blocks" => 0,
4718 "compress" => 0,
4719 "dirty-bitmaps" => $dirty_bitmaps,
4720 };
4721
4722 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4723
4724 for my $supported_capability (@$supported_capabilities) {
4725 push @$cap_ref, {
4726 capability => $supported_capability->{capability},
4727 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4728 };
4729 }
4730
4731 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4732 }
4733
4734 sub foreach_volid {
4735 my ($conf, $func, @param) = @_;
4736
4737 my $volhash = {};
4738
4739 my $test_volid = sub {
4740 my ($key, $drive, $snapname) = @_;
4741
4742 my $volid = $drive->{file};
4743 return if !$volid;
4744
4745 $volhash->{$volid}->{cdrom} //= 1;
4746 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4747
4748 my $replicate = $drive->{replicate} // 1;
4749 $volhash->{$volid}->{replicate} //= 0;
4750 $volhash->{$volid}->{replicate} = 1 if $replicate;
4751
4752 $volhash->{$volid}->{shared} //= 0;
4753 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4754
4755 $volhash->{$volid}->{referenced_in_config} //= 0;
4756 $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname);
4757
4758 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4759 if defined($snapname);
4760
4761 my $size = $drive->{size};
4762 $volhash->{$volid}->{size} //= $size if $size;
4763
4764 $volhash->{$volid}->{is_vmstate} //= 0;
4765 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4766
4767 $volhash->{$volid}->{is_tpmstate} //= 0;
4768 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4769
4770 $volhash->{$volid}->{is_unused} //= 0;
4771 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4772
4773 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4774 };
4775
4776 my $include_opts = {
4777 extra_keys => ['vmstate'],
4778 include_unused => 1,
4779 };
4780
4781 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4782 foreach my $snapname (keys %{$conf->{snapshots}}) {
4783 my $snap = $conf->{snapshots}->{$snapname};
4784 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4785 }
4786
4787 foreach my $volid (keys %$volhash) {
4788 &$func($volid, $volhash->{$volid}, @param);
4789 }
4790 }
4791
4792 my $fast_plug_option = {
4793 'lock' => 1,
4794 'name' => 1,
4795 'onboot' => 1,
4796 'shares' => 1,
4797 'startup' => 1,
4798 'description' => 1,
4799 'protection' => 1,
4800 'vmstatestorage' => 1,
4801 'hookscript' => 1,
4802 'tags' => 1,
4803 };
4804
4805 # hotplug changes in [PENDING]
4806 # $selection hash can be used to only apply specified options, for
4807 # example: { cores => 1 } (only apply changed 'cores')
4808 # $errors ref is used to return error messages
4809 sub vmconfig_hotplug_pending {
4810 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4811
4812 my $defaults = load_defaults();
4813 my $arch = get_vm_arch($conf);
4814 my $machine_type = get_vm_machine($conf, undef, $arch);
4815
4816 # commit values which do not have any impact on running VM first
4817 # Note: those option cannot raise errors, we we do not care about
4818 # $selection and always apply them.
4819
4820 my $add_error = sub {
4821 my ($opt, $msg) = @_;
4822 $errors->{$opt} = "hotplug problem - $msg";
4823 };
4824
4825 my $changes = 0;
4826 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4827 if ($fast_plug_option->{$opt}) {
4828 $conf->{$opt} = $conf->{pending}->{$opt};
4829 delete $conf->{pending}->{$opt};
4830 $changes = 1;
4831 }
4832 }
4833
4834 if ($changes) {
4835 PVE::QemuConfig->write_config($vmid, $conf);
4836 }
4837
4838 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
4839
4840 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
4841 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4842 foreach my $opt (sort keys %$pending_delete_hash) {
4843 next if $selection && !$selection->{$opt};
4844 my $force = $pending_delete_hash->{$opt}->{force};
4845 eval {
4846 if ($opt eq 'hotplug') {
4847 die "skip\n" if ($conf->{hotplug} =~ /memory/);
4848 } elsif ($opt eq 'tablet') {
4849 die "skip\n" if !$hotplug_features->{usb};
4850 if ($defaults->{tablet}) {
4851 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4852 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4853 if $arch eq 'aarch64';
4854 } else {
4855 vm_deviceunplug($vmid, $conf, 'tablet');
4856 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4857 }
4858 } elsif ($opt =~ m/^usb\d+/) {
4859 die "skip\n";
4860 # since we cannot reliably hot unplug usb devices we are disabling it
4861 #die "skip\n" if !$hotplug_features->{usb} || $conf->{$opt} =~ m/spice/i;
4862 #vm_deviceunplug($vmid, $conf, $opt);
4863 } elsif ($opt eq 'vcpus') {
4864 die "skip\n" if !$hotplug_features->{cpu};
4865 qemu_cpu_hotplug($vmid, $conf, undef);
4866 } elsif ($opt eq 'balloon') {
4867 # enable balloon device is not hotpluggable
4868 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
4869 # here we reset the ballooning value to memory
4870 my $balloon = $conf->{memory} || $defaults->{memory};
4871 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4872 } elsif ($fast_plug_option->{$opt}) {
4873 # do nothing
4874 } elsif ($opt =~ m/^net(\d+)$/) {
4875 die "skip\n" if !$hotplug_features->{network};
4876 vm_deviceunplug($vmid, $conf, $opt);
4877 } elsif (is_valid_drivename($opt)) {
4878 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
4879 vm_deviceunplug($vmid, $conf, $opt);
4880 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4881 } elsif ($opt =~ m/^memory$/) {
4882 die "skip\n" if !$hotplug_features->{memory};
4883 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
4884 } elsif ($opt eq 'cpuunits') {
4885 $cgroup->change_cpu_shares(undef, 1024);
4886 } elsif ($opt eq 'cpulimit') {
4887 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
4888 } else {
4889 die "skip\n";
4890 }
4891 };
4892 if (my $err = $@) {
4893 &$add_error($opt, $err) if $err ne "skip\n";
4894 } else {
4895 delete $conf->{$opt};
4896 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4897 }
4898 }
4899
4900 my ($apply_pending_cloudinit, $apply_pending_cloudinit_done);
4901 $apply_pending_cloudinit = sub {
4902 return if $apply_pending_cloudinit_done; # once is enough
4903 $apply_pending_cloudinit_done = 1; # once is enough
4904
4905 my ($key, $value) = @_;
4906
4907 my @cloudinit_opts = keys %$confdesc_cloudinit;
4908 foreach my $opt (keys %{$conf->{pending}}) {
4909 next if !grep { $_ eq $opt } @cloudinit_opts;
4910 $conf->{$opt} = delete $conf->{pending}->{$opt};
4911 }
4912
4913 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4914 foreach my $opt (sort keys %$pending_delete_hash) {
4915 next if !grep { $_ eq $opt } @cloudinit_opts;
4916 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4917 delete $conf->{$opt};
4918 }
4919
4920 my $new_conf = { %$conf };
4921 $new_conf->{$key} = $value;
4922 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($new_conf, $vmid);
4923 };
4924
4925 foreach my $opt (keys %{$conf->{pending}}) {
4926 next if $selection && !$selection->{$opt};
4927 my $value = $conf->{pending}->{$opt};
4928 eval {
4929 if ($opt eq 'hotplug') {
4930 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
4931 } elsif ($opt eq 'tablet') {
4932 die "skip\n" if !$hotplug_features->{usb};
4933 if ($value == 1) {
4934 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4935 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4936 if $arch eq 'aarch64';
4937 } elsif ($value == 0) {
4938 vm_deviceunplug($vmid, $conf, 'tablet');
4939 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4940 }
4941 } elsif ($opt =~ m/^usb\d+$/) {
4942 die "skip\n";
4943 # since we cannot reliably hot unplug usb devices we disable it for now
4944 #die "skip\n" if !$hotplug_features->{usb} || $value =~ m/spice/i;
4945 #my $d = eval { parse_property_string($usbdesc->{format}, $value) };
4946 #die "skip\n" if !$d;
4947 #qemu_usb_hotplug($storecfg, $conf, $vmid, $opt, $d, $arch, $machine_type);
4948 } elsif ($opt eq 'vcpus') {
4949 die "skip\n" if !$hotplug_features->{cpu};
4950 qemu_cpu_hotplug($vmid, $conf, $value);
4951 } elsif ($opt eq 'balloon') {
4952 # enable/disable balloning device is not hotpluggable
4953 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
4954 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
4955 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
4956
4957 # allow manual ballooning if shares is set to zero
4958 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
4959 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
4960 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4961 }
4962 } elsif ($opt =~ m/^net(\d+)$/) {
4963 # some changes can be done without hotplug
4964 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
4965 $vmid, $opt, $value, $arch, $machine_type);
4966 } elsif (is_valid_drivename($opt)) {
4967 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
4968 # some changes can be done without hotplug
4969 my $drive = parse_drive($opt, $value);
4970 if (drive_is_cloudinit($drive)) {
4971 &$apply_pending_cloudinit($opt, $value);
4972 }
4973 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
4974 $vmid, $opt, $value, $arch, $machine_type);
4975 } elsif ($opt =~ m/^memory$/) { #dimms
4976 die "skip\n" if !$hotplug_features->{memory};
4977 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
4978 } elsif ($opt eq 'cpuunits') {
4979 my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
4980 $cgroup->change_cpu_shares($new_cpuunits, 1024);
4981 } elsif ($opt eq 'cpulimit') {
4982 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
4983 $cgroup->change_cpu_quota($cpulimit, 100000);
4984 } elsif ($opt eq 'agent') {
4985 vmconfig_update_agent($conf, $opt, $value);
4986 } else {
4987 die "skip\n"; # skip non-hot-pluggable options
4988 }
4989 };
4990 if (my $err = $@) {
4991 &$add_error($opt, $err) if $err ne "skip\n";
4992 } else {
4993 $conf->{$opt} = $value;
4994 delete $conf->{pending}->{$opt};
4995 }
4996 }
4997
4998 PVE::QemuConfig->write_config($vmid, $conf);
4999 }
5000
5001 sub try_deallocate_drive {
5002 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
5003
5004 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
5005 my $volid = $drive->{file};
5006 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
5007 my $sid = PVE::Storage::parse_volume_id($volid);
5008 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
5009
5010 # check if the disk is really unused
5011 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
5012 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
5013 PVE::Storage::vdisk_free($storecfg, $volid);
5014 return 1;
5015 } else {
5016 # If vm is not owner of this disk remove from config
5017 return 1;
5018 }
5019 }
5020
5021 return;
5022 }
5023
5024 sub vmconfig_delete_or_detach_drive {
5025 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
5026
5027 my $drive = parse_drive($opt, $conf->{$opt});
5028
5029 my $rpcenv = PVE::RPCEnvironment::get();
5030 my $authuser = $rpcenv->get_user();
5031
5032 if ($force) {
5033 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
5034 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
5035 } else {
5036 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
5037 }
5038 }
5039
5040
5041
5042 sub vmconfig_apply_pending {
5043 my ($vmid, $conf, $storecfg, $errors) = @_;
5044
5045 return if !scalar(keys %{$conf->{pending}});
5046
5047 my $add_apply_error = sub {
5048 my ($opt, $msg) = @_;
5049 my $err_msg = "unable to apply pending change $opt : $msg";
5050 $errors->{$opt} = $err_msg;
5051 warn $err_msg;
5052 };
5053
5054 # cold plug
5055
5056 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
5057 foreach my $opt (sort keys %$pending_delete_hash) {
5058 my $force = $pending_delete_hash->{$opt}->{force};
5059 eval {
5060 if ($opt =~ m/^unused/) {
5061 die "internal error";
5062 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5063 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5064 }
5065 };
5066 if (my $err = $@) {
5067 $add_apply_error->($opt, $err);
5068 } else {
5069 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5070 delete $conf->{$opt};
5071 }
5072 }
5073
5074 PVE::QemuConfig->cleanup_pending($conf);
5075
5076 foreach my $opt (keys %{$conf->{pending}}) { # add/change
5077 next if $opt eq 'delete'; # just to be sure
5078 eval {
5079 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5080 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
5081 }
5082 };
5083 if (my $err = $@) {
5084 $add_apply_error->($opt, $err);
5085 } else {
5086 $conf->{$opt} = delete $conf->{pending}->{$opt};
5087 }
5088 }
5089
5090 # write all changes at once to avoid unnecessary i/o
5091 PVE::QemuConfig->write_config($vmid, $conf);
5092 }
5093
5094 sub vmconfig_update_net {
5095 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5096
5097 my $newnet = parse_net($value);
5098
5099 if ($conf->{$opt}) {
5100 my $oldnet = parse_net($conf->{$opt});
5101
5102 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5103 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5104 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
5105 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
5106
5107 # for non online change, we try to hot-unplug
5108 die "skip\n" if !$hotplug;
5109 vm_deviceunplug($vmid, $conf, $opt);
5110 } else {
5111
5112 die "internal error" if $opt !~ m/net(\d+)/;
5113 my $iface = "tap${vmid}i$1";
5114
5115 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5116 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5117 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5118 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
5119 PVE::Network::tap_unplug($iface);
5120
5121 if ($have_sdn) {
5122 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5123 } else {
5124 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5125 }
5126 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
5127 # Rate can be applied on its own but any change above needs to
5128 # include the rate in tap_plug since OVS resets everything.
5129 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
5130 }
5131
5132 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
5133 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5134 }
5135
5136 return 1;
5137 }
5138 }
5139
5140 if ($hotplug) {
5141 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5142 } else {
5143 die "skip\n";
5144 }
5145 }
5146
5147 sub vmconfig_update_agent {
5148 my ($conf, $opt, $value) = @_;
5149
5150 die "skip\n" if !$conf->{$opt};
5151
5152 my $hotplug_options = { fstrim_cloned_disks => 1 };
5153
5154 my $old_agent = parse_guest_agent($conf);
5155 my $agent = parse_guest_agent({$opt => $value});
5156
5157 for my $option (keys %$agent) { # added/changed options
5158 next if defined($hotplug_options->{$option});
5159 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5160 }
5161
5162 for my $option (keys %$old_agent) { # removed options
5163 next if defined($hotplug_options->{$option});
5164 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5165 }
5166
5167 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
5168 }
5169
5170 sub vmconfig_update_disk {
5171 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5172
5173 my $drive = parse_drive($opt, $value);
5174
5175 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5176 my $media = $drive->{media} || 'disk';
5177 my $oldmedia = $old_drive->{media} || 'disk';
5178 die "unable to change media type\n" if $media ne $oldmedia;
5179
5180 if (!drive_is_cdrom($old_drive)) {
5181
5182 if ($drive->{file} ne $old_drive->{file}) {
5183
5184 die "skip\n" if !$hotplug;
5185
5186 # unplug and register as unused
5187 vm_deviceunplug($vmid, $conf, $opt);
5188 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5189
5190 } else {
5191 # update existing disk
5192
5193 # skip non hotpluggable value
5194 if (safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5195 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5196 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5197 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5198 safe_string_ne($drive->{ssd}, $old_drive->{ssd})) {
5199 die "skip\n";
5200 }
5201
5202 # apply throttle
5203 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5204 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5205 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5206 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5207 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5208 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5209 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5210 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5211 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5212 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5213 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5214 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5215 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5216 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5217 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5218 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5219 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5220 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5221
5222 qemu_block_set_io_throttle(
5223 $vmid,"drive-$opt",
5224 ($drive->{mbps} || 0)*1024*1024,
5225 ($drive->{mbps_rd} || 0)*1024*1024,
5226 ($drive->{mbps_wr} || 0)*1024*1024,
5227 $drive->{iops} || 0,
5228 $drive->{iops_rd} || 0,
5229 $drive->{iops_wr} || 0,
5230 ($drive->{mbps_max} || 0)*1024*1024,
5231 ($drive->{mbps_rd_max} || 0)*1024*1024,
5232 ($drive->{mbps_wr_max} || 0)*1024*1024,
5233 $drive->{iops_max} || 0,
5234 $drive->{iops_rd_max} || 0,
5235 $drive->{iops_wr_max} || 0,
5236 $drive->{bps_max_length} || 1,
5237 $drive->{bps_rd_max_length} || 1,
5238 $drive->{bps_wr_max_length} || 1,
5239 $drive->{iops_max_length} || 1,
5240 $drive->{iops_rd_max_length} || 1,
5241 $drive->{iops_wr_max_length} || 1,
5242 );
5243
5244 }
5245
5246 return 1;
5247 }
5248
5249 } else { # cdrom
5250
5251 if ($drive->{file} eq 'none') {
5252 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5253 if (drive_is_cloudinit($old_drive)) {
5254 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5255 }
5256 } else {
5257 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5258
5259 # force eject if locked
5260 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5261
5262 if ($path) {
5263 mon_cmd($vmid, "blockdev-change-medium",
5264 id => "$opt", filename => "$path");
5265 }
5266 }
5267
5268 return 1;
5269 }
5270 }
5271
5272 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5273 # hotplug new disks
5274 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5275 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5276 }
5277
5278 # called in locked context by incoming migration
5279 sub vm_migrate_get_nbd_disks {
5280 my ($storecfg, $conf, $replicated_volumes) = @_;
5281
5282 my $local_volumes = {};
5283 PVE::QemuConfig->foreach_volume($conf, sub {
5284 my ($ds, $drive) = @_;
5285
5286 return if drive_is_cdrom($drive);
5287 return if $ds eq 'tpmstate0';
5288
5289 my $volid = $drive->{file};
5290
5291 return if !$volid;
5292
5293 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5294
5295 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5296 return if $scfg->{shared};
5297
5298 # replicated disks re-use existing state via bitmap
5299 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5300 $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
5301 });
5302 return $local_volumes;
5303 }
5304
5305 # called in locked context by incoming migration
5306 sub vm_migrate_alloc_nbd_disks {
5307 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5308
5309 my $nbd = {};
5310 foreach my $opt (sort keys %$source_volumes) {
5311 my ($volid, $storeid, $volname, $drive, $use_existing, $format) = @{$source_volumes->{$opt}};
5312
5313 if ($use_existing) {
5314 $nbd->{$opt}->{drivestr} = print_drive($drive);
5315 $nbd->{$opt}->{volid} = $volid;
5316 $nbd->{$opt}->{replicated} = 1;
5317 next;
5318 }
5319
5320 # storage mapping + volname = regular migration
5321 # storage mapping + format = remote migration
5322 # order of precedence, filtered by whether storage supports it:
5323 # 1. explicit requested format
5324 # 2. format of current volume
5325 # 3. default format of storage
5326 if (!$storagemap->{identity}) {
5327 $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
5328 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5329 if (!$format || !grep { $format eq $_ } @$validFormats) {
5330 if ($volname) {
5331 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5332 my $fileFormat = qemu_img_format($scfg, $volname);
5333 $format = $fileFormat
5334 if grep { $fileFormat eq $_ } @$validFormats;
5335 }
5336 $format //= $defFormat;
5337 }
5338 } else {
5339 # can't happen for remote migration, so $volname is always defined
5340 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5341 $format = qemu_img_format($scfg, $volname);
5342 }
5343
5344 my $size = $drive->{size} / 1024;
5345 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5346 my $newdrive = $drive;
5347 $newdrive->{format} = $format;
5348 $newdrive->{file} = $newvolid;
5349 my $drivestr = print_drive($newdrive);
5350 $nbd->{$opt}->{drivestr} = $drivestr;
5351 $nbd->{$opt}->{volid} = $newvolid;
5352 }
5353
5354 return $nbd;
5355 }
5356
5357 # see vm_start_nolock for parameters, additionally:
5358 # migrate_opts:
5359 # storagemap = parsed storage map for allocating NBD disks
5360 sub vm_start {
5361 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5362
5363 return PVE::QemuConfig->lock_config($vmid, sub {
5364 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5365
5366 die "you can't start a vm if it's a template\n"
5367 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5368
5369 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5370 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5371
5372 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5373
5374 if ($has_backup_lock && $running) {
5375 # a backup is currently running, attempt to start the guest in the
5376 # existing QEMU instance
5377 return vm_resume($vmid);
5378 }
5379
5380 PVE::QemuConfig->check_lock($conf)
5381 if !($params->{skiplock} || $has_suspended_lock);
5382
5383 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5384
5385 die "VM $vmid already running\n" if $running;
5386
5387 if (my $storagemap = $migrate_opts->{storagemap}) {
5388 my $replicated = $migrate_opts->{replicated_volumes};
5389 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5390 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5391
5392 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5393 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5394 }
5395 }
5396
5397 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5398 });
5399 }
5400
5401
5402 # params:
5403 # statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5404 # skiplock => 0/1, skip checking for config lock
5405 # skiptemplate => 0/1, skip checking whether VM is template
5406 # forcemachine => to force Qemu machine (rollback/migration)
5407 # forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5408 # timeout => in seconds
5409 # paused => start VM in paused state (backup)
5410 # resume => resume from hibernation
5411 # pbs-backing => {
5412 # sata0 => {
5413 # repository
5414 # snapshot
5415 # keyfile
5416 # archive
5417 # },
5418 # virtio2 => ...
5419 # }
5420 # migrate_opts:
5421 # nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5422 # migratedfrom => source node
5423 # spice_ticket => used for spice migration, passed via tunnel/stdin
5424 # network => CIDR of migration network
5425 # type => secure/insecure - tunnel over encrypted connection or plain-text
5426 # nbd_proto_version => int, 0 for TCP, 1 for UNIX
5427 # replicated_volumes => which volids should be re-used with bitmaps for nbd migration
5428 # offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
5429 # contained in config
5430 sub vm_start_nolock {
5431 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5432
5433 my $statefile = $params->{statefile};
5434 my $resume = $params->{resume};
5435
5436 my $migratedfrom = $migrate_opts->{migratedfrom};
5437 my $migration_type = $migrate_opts->{type};
5438
5439 my $res = {};
5440
5441 # clean up leftover reboot request files
5442 eval { clear_reboot_request($vmid); };
5443 warn $@ if $@;
5444
5445 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5446 vmconfig_apply_pending($vmid, $conf, $storecfg);
5447 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5448 }
5449
5450 # don't regenerate the ISO if the VM is started as part of a live migration
5451 # this way we can reuse the old ISO with the correct config
5452 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if !$migratedfrom;
5453
5454 # override offline migrated volumes, conf is out of date still
5455 if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
5456 for my $key (sort keys $offline_volumes->%*) {
5457 my $parsed = parse_drive($key, $conf->{$key});
5458 $parsed->{file} = $offline_volumes->{$key};
5459 $conf->{$key} = print_drive($parsed);
5460 }
5461 }
5462
5463 my $defaults = load_defaults();
5464
5465 # set environment variable useful inside network script
5466 $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom;
5467
5468 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5469
5470 my $forcemachine = $params->{forcemachine};
5471 my $forcecpu = $params->{forcecpu};
5472 if ($resume) {
5473 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5474 $forcemachine = $conf->{runningmachine};
5475 $forcecpu = $conf->{runningcpu};
5476 print "Resuming suspended VM\n";
5477 }
5478
5479 my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid,
5480 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
5481
5482 my $migration_ip;
5483 my $get_migration_ip = sub {
5484 my ($nodename) = @_;
5485
5486 return $migration_ip if defined($migration_ip);
5487
5488 my $cidr = $migrate_opts->{network};
5489
5490 if (!defined($cidr)) {
5491 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5492 $cidr = $dc_conf->{migration}->{network};
5493 }
5494
5495 if (defined($cidr)) {
5496 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5497
5498 die "could not get IP: no address configured on local " .
5499 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5500
5501 die "could not get IP: multiple addresses configured on local " .
5502 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5503
5504 $migration_ip = @$ips[0];
5505 }
5506
5507 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5508 if !defined($migration_ip);
5509
5510 return $migration_ip;
5511 };
5512
5513 my $migrate_uri;
5514 if ($statefile) {
5515 if ($statefile eq 'tcp') {
5516 my $localip = "localhost";
5517 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5518 my $nodename = nodename();
5519
5520 if (!defined($migration_type)) {
5521 if (defined($datacenterconf->{migration}->{type})) {
5522 $migration_type = $datacenterconf->{migration}->{type};
5523 } else {
5524 $migration_type = 'secure';
5525 }
5526 }
5527
5528 if ($migration_type eq 'insecure') {
5529 $localip = $get_migration_ip->($nodename);
5530 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5531 }
5532
5533 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5534 my $migrate_port = PVE::Tools::next_migrate_port($pfamily);
5535 $migrate_uri = "tcp:${localip}:${migrate_port}";
5536 push @$cmd, '-incoming', $migrate_uri;
5537 push @$cmd, '-S';
5538
5539 } elsif ($statefile eq 'unix') {
5540 # should be default for secure migrations as a ssh TCP forward
5541 # tunnel is not deterministic reliable ready and fails regurarly
5542 # to set up in time, so use UNIX socket forwards
5543 my $socket_addr = "/run/qemu-server/$vmid.migrate";
5544 unlink $socket_addr;
5545
5546 $migrate_uri = "unix:$socket_addr";
5547
5548 push @$cmd, '-incoming', $migrate_uri;
5549 push @$cmd, '-S';
5550
5551 } elsif (-e $statefile) {
5552 push @$cmd, '-loadstate', $statefile;
5553 } else {
5554 my $statepath = PVE::Storage::path($storecfg, $statefile);
5555 push @$vollist, $statefile;
5556 push @$cmd, '-loadstate', $statepath;
5557 }
5558 } elsif ($params->{paused}) {
5559 push @$cmd, '-S';
5560 }
5561
5562 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5563
5564 my $pci_devices = {}; # host pci devices
5565 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
5566 my $dev = $conf->{"hostpci$i"} or next;
5567 $pci_devices->{$i} = parse_hostpci($dev);
5568 }
5569
5570 # do not reserve pciid for mediated devices, sysfs will error out for duplicate assignment
5571 my $real_pci_devices = [ grep { !(defined($_->{mdev}) && scalar($_->{pciid}->@*) == 1) } values $pci_devices->%* ];
5572
5573 # map to a flat list of pci ids
5574 my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } $real_pci_devices->@* ];
5575
5576 # reserve all PCI IDs before actually doing anything with them
5577 PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout);
5578
5579 eval {
5580 my $uuid;
5581 for my $id (sort keys %$pci_devices) {
5582 my $d = $pci_devices->{$id};
5583 for my $dev ($d->{pciid}->@*) {
5584 my $info = PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
5585
5586 # nvidia grid needs the uuid of the mdev as qemu parameter
5587 if ($d->{mdev} && !defined($uuid) && $info->{vendor} eq '10de') {
5588 $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $id);
5589 }
5590 }
5591 }
5592 push @$cmd, '-uuid', $uuid if defined($uuid);
5593 };
5594 if (my $err = $@) {
5595 eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
5596 warn $@ if $@;
5597 die $err;
5598 }
5599
5600 PVE::Storage::activate_volumes($storecfg, $vollist);
5601
5602 eval {
5603 run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{});
5604 };
5605 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5606 # timeout should be more than enough here...
5607 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
5608
5609 my $cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
5610
5611 my %run_params = (
5612 timeout => $statefile ? undef : $start_timeout,
5613 umask => 0077,
5614 noerr => 1,
5615 );
5616
5617 # when migrating, prefix QEMU output so other side can pick up any
5618 # errors that might occur and show the user
5619 if ($migratedfrom) {
5620 $run_params{quiet} = 1;
5621 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5622 }
5623
5624 my %systemd_properties = (
5625 Slice => 'qemu.slice',
5626 KillMode => 'process',
5627 SendSIGKILL => 0,
5628 TimeoutStopUSec => ULONG_MAX, # infinity
5629 );
5630
5631 if (PVE::CGroup::cgroup_mode() == 2) {
5632 $systemd_properties{CPUWeight} = $cpuunits;
5633 } else {
5634 $systemd_properties{CPUShares} = $cpuunits;
5635 }
5636
5637 if (my $cpulimit = $conf->{cpulimit}) {
5638 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5639 }
5640 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5641
5642 my $run_qemu = sub {
5643 PVE::Tools::run_fork sub {
5644 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5645
5646 my $tpmpid;
5647 if (my $tpm = $conf->{tpmstate0}) {
5648 # start the TPM emulator so QEMU can connect on start
5649 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5650 }
5651
5652 my $exitcode = run_command($cmd, %run_params);
5653 if ($exitcode) {
5654 if ($tpmpid) {
5655 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5656 kill 'TERM', $tpmpid;
5657 }
5658 die "QEMU exited with code $exitcode\n";
5659 }
5660 };
5661 };
5662
5663 if ($conf->{hugepages}) {
5664
5665 my $code = sub {
5666 my $hugepages_topology = PVE::QemuServer::Memory::hugepages_topology($conf);
5667 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5668
5669 PVE::QemuServer::Memory::hugepages_mount();
5670 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5671
5672 eval { $run_qemu->() };
5673 if (my $err = $@) {
5674 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5675 if !$conf->{keephugepages};
5676 die $err;
5677 }
5678
5679 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5680 if !$conf->{keephugepages};
5681 };
5682 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5683
5684 } else {
5685 eval { $run_qemu->() };
5686 }
5687
5688 if (my $err = $@) {
5689 # deactivate volumes if start fails
5690 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5691 eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
5692
5693 die "start failed: $err";
5694 }
5695
5696 # re-reserve all PCI IDs now that we can know the actual VM PID
5697 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5698 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) };
5699 warn $@ if $@;
5700
5701 print "migration listens on $migrate_uri\n" if $migrate_uri;
5702 $res->{migrate_uri} = $migrate_uri;
5703
5704 if ($statefile && $statefile ne 'tcp' && $statefile ne 'unix') {
5705 eval { mon_cmd($vmid, "cont"); };
5706 warn $@ if $@;
5707 }
5708
5709 #start nbd server for storage migration
5710 if (my $nbd = $migrate_opts->{nbd}) {
5711 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
5712
5713 my $migrate_storage_uri;
5714 # nbd_protocol_version > 0 for unix socket support
5715 if ($nbd_protocol_version > 0 && $migration_type eq 'secure') {
5716 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5717 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5718 $migrate_storage_uri = "nbd:unix:$socket_path";
5719 } else {
5720 my $nodename = nodename();
5721 my $localip = $get_migration_ip->($nodename);
5722 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5723 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
5724
5725 mon_cmd($vmid, "nbd-server-start", addr => {
5726 type => 'inet',
5727 data => {
5728 host => "${localip}",
5729 port => "${storage_migrate_port}",
5730 },
5731 });
5732 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5733 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
5734 }
5735
5736 $res->{migrate_storage_uri} = $migrate_storage_uri;
5737
5738 foreach my $opt (sort keys %$nbd) {
5739 my $drivestr = $nbd->{$opt}->{drivestr};
5740 my $volid = $nbd->{$opt}->{volid};
5741 mon_cmd($vmid, "nbd-server-add", device => "drive-$opt", writable => JSON::true );
5742 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
5743 print "storage migration listens on $nbd_uri volume:$drivestr\n";
5744 print "re-using replicated volume: $opt - $volid\n"
5745 if $nbd->{$opt}->{replicated};
5746
5747 $res->{drives}->{$opt} = $nbd->{$opt};
5748 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
5749 }
5750 }
5751
5752 if ($migratedfrom) {
5753 eval {
5754 set_migration_caps($vmid);
5755 };
5756 warn $@ if $@;
5757
5758 if ($spice_port) {
5759 print "spice listens on port $spice_port\n";
5760 $res->{spice_port} = $spice_port;
5761 if ($migrate_opts->{spice_ticket}) {
5762 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
5763 $migrate_opts->{spice_ticket});
5764 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
5765 }
5766 }
5767
5768 } else {
5769 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
5770 if !$statefile && $conf->{balloon};
5771
5772 foreach my $opt (keys %$conf) {
5773 next if $opt !~ m/^net\d+$/;
5774 my $nicconf = parse_net($conf->{$opt});
5775 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
5776 }
5777 }
5778
5779 mon_cmd($vmid, 'qom-set',
5780 path => "machine/peripheral/balloon0",
5781 property => "guest-stats-polling-interval",
5782 value => 2) if (!defined($conf->{balloon}) || $conf->{balloon});
5783
5784 if ($resume) {
5785 print "Resumed VM, removing state\n";
5786 if (my $vmstate = $conf->{vmstate}) {
5787 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
5788 PVE::Storage::vdisk_free($storecfg, $vmstate);
5789 }
5790 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
5791 PVE::QemuConfig->write_config($vmid, $conf);
5792 }
5793
5794 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
5795
5796 return $res;
5797 }
5798
5799 sub vm_commandline {
5800 my ($storecfg, $vmid, $snapname) = @_;
5801
5802 my $conf = PVE::QemuConfig->load_config($vmid);
5803
5804 my ($forcemachine, $forcecpu);
5805 if ($snapname) {
5806 my $snapshot = $conf->{snapshots}->{$snapname};
5807 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
5808
5809 # check for machine or CPU overrides in snapshot
5810 $forcemachine = $snapshot->{runningmachine};
5811 $forcecpu = $snapshot->{runningcpu};
5812
5813 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
5814
5815 $conf = $snapshot;
5816 }
5817
5818 my $defaults = load_defaults();
5819
5820 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
5821
5822 return PVE::Tools::cmd2string($cmd);
5823 }
5824
5825 sub vm_reset {
5826 my ($vmid, $skiplock) = @_;
5827
5828 PVE::QemuConfig->lock_config($vmid, sub {
5829
5830 my $conf = PVE::QemuConfig->load_config($vmid);
5831
5832 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5833
5834 mon_cmd($vmid, "system_reset");
5835 });
5836 }
5837
5838 sub get_vm_volumes {
5839 my ($conf) = @_;
5840
5841 my $vollist = [];
5842 foreach_volid($conf, sub {
5843 my ($volid, $attr) = @_;
5844
5845 return if $volid =~ m|^/|;
5846
5847 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
5848 return if !$sid;
5849
5850 push @$vollist, $volid;
5851 });
5852
5853 return $vollist;
5854 }
5855
5856 sub vm_stop_cleanup {
5857 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
5858
5859 eval {
5860
5861 if (!$keepActive) {
5862 my $vollist = get_vm_volumes($conf);
5863 PVE::Storage::deactivate_volumes($storecfg, $vollist);
5864
5865 if (my $tpmdrive = $conf->{tpmstate0}) {
5866 my $tpm = parse_drive("tpmstate0", $tpmdrive);
5867 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
5868 if ($storeid) {
5869 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
5870 }
5871 }
5872 }
5873
5874 foreach my $ext (qw(mon qmp pid vnc qga)) {
5875 unlink "/var/run/qemu-server/${vmid}.$ext";
5876 }
5877
5878 if ($conf->{ivshmem}) {
5879 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
5880 # just delete it for now, VMs which have this already open do not
5881 # are affected, but new VMs will get a separated one. If this
5882 # becomes an issue we either add some sort of ref-counting or just
5883 # add a "don't delete on stop" flag to the ivshmem format.
5884 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
5885 }
5886
5887 my $ids = [];
5888 foreach my $key (keys %$conf) {
5889 next if $key !~ m/^hostpci(\d+)$/;
5890 my $hostpciindex = $1;
5891 my $d = parse_hostpci($conf->{$key});
5892 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
5893
5894 foreach my $pci (@{$d->{pciid}}) {
5895 my $pciid = $pci->{id};
5896 push @$ids, $pci->{id};
5897 PVE::SysFSTools::pci_cleanup_mdev_device($pciid, $uuid);
5898 }
5899 }
5900 PVE::QemuServer::PCI::remove_pci_reservation($ids);
5901
5902 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
5903 };
5904 warn $@ if $@; # avoid errors - just warn
5905 }
5906
5907 # call only in locked context
5908 sub _do_vm_stop {
5909 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
5910
5911 my $pid = check_running($vmid, $nocheck);
5912 return if !$pid;
5913
5914 my $conf;
5915 if (!$nocheck) {
5916 $conf = PVE::QemuConfig->load_config($vmid);
5917 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5918 if (!defined($timeout) && $shutdown && $conf->{startup}) {
5919 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
5920 $timeout = $opts->{down} if $opts->{down};
5921 }
5922 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
5923 }
5924
5925 eval {
5926 if ($shutdown) {
5927 if (defined($conf) && get_qga_key($conf, 'enabled')) {
5928 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
5929 } else {
5930 mon_cmd($vmid, "system_powerdown");
5931 }
5932 } else {
5933 mon_cmd($vmid, "quit");
5934 }
5935 };
5936 my $err = $@;
5937
5938 if (!$err) {
5939 $timeout = 60 if !defined($timeout);
5940
5941 my $count = 0;
5942 while (($count < $timeout) && check_running($vmid, $nocheck)) {
5943 $count++;
5944 sleep 1;
5945 }
5946
5947 if ($count >= $timeout) {
5948 if ($force) {
5949 warn "VM still running - terminating now with SIGTERM\n";
5950 kill 15, $pid;
5951 } else {
5952 die "VM quit/powerdown failed - got timeout\n";
5953 }
5954 } else {
5955 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
5956 return;
5957 }
5958 } else {
5959 if (!check_running($vmid, $nocheck)) {
5960 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
5961 return;
5962 }
5963 if ($force) {
5964 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
5965 kill 15, $pid;
5966 } else {
5967 die "VM quit/powerdown failed\n";
5968 }
5969 }
5970
5971 # wait again
5972 $timeout = 10;
5973
5974 my $count = 0;
5975 while (($count < $timeout) && check_running($vmid, $nocheck)) {
5976 $count++;
5977 sleep 1;
5978 }
5979
5980 if ($count >= $timeout) {
5981 warn "VM still running - terminating now with SIGKILL\n";
5982 kill 9, $pid;
5983 sleep 1;
5984 }
5985
5986 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
5987 }
5988
5989 # Note: use $nocheck to skip tests if VM configuration file exists.
5990 # We need that when migration VMs to other nodes (files already moved)
5991 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
5992 sub vm_stop {
5993 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
5994
5995 $force = 1 if !defined($force) && !$shutdown;
5996
5997 if ($migratedfrom){
5998 my $pid = check_running($vmid, $nocheck, $migratedfrom);
5999 kill 15, $pid if $pid;
6000 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
6001 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
6002 return;
6003 }
6004
6005 PVE::QemuConfig->lock_config($vmid, sub {
6006 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
6007 });
6008 }
6009
6010 sub vm_reboot {
6011 my ($vmid, $timeout) = @_;
6012
6013 PVE::QemuConfig->lock_config($vmid, sub {
6014 eval {
6015
6016 # only reboot if running, as qmeventd starts it again on a stop event
6017 return if !check_running($vmid);
6018
6019 create_reboot_request($vmid);
6020
6021 my $storecfg = PVE::Storage::config();
6022 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
6023
6024 };
6025 if (my $err = $@) {
6026 # avoid that the next normal shutdown will be confused for a reboot
6027 clear_reboot_request($vmid);
6028 die $err;
6029 }
6030 });
6031 }
6032
6033 # note: if using the statestorage parameter, the caller has to check privileges
6034 sub vm_suspend {
6035 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
6036
6037 my $conf;
6038 my $path;
6039 my $storecfg;
6040 my $vmstate;
6041
6042 PVE::QemuConfig->lock_config($vmid, sub {
6043
6044 $conf = PVE::QemuConfig->load_config($vmid);
6045
6046 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
6047 PVE::QemuConfig->check_lock($conf)
6048 if !($skiplock || $is_backing_up);
6049
6050 die "cannot suspend to disk during backup\n"
6051 if $is_backing_up && $includestate;
6052
6053 if ($includestate) {
6054 $conf->{lock} = 'suspending';
6055 my $date = strftime("%Y-%m-%d", localtime(time()));
6056 $storecfg = PVE::Storage::config();
6057 if (!$statestorage) {
6058 $statestorage = find_vmstate_storage($conf, $storecfg);
6059 # check permissions for the storage
6060 my $rpcenv = PVE::RPCEnvironment::get();
6061 if ($rpcenv->{type} ne 'cli') {
6062 my $authuser = $rpcenv->get_user();
6063 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
6064 }
6065 }
6066
6067
6068 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
6069 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
6070 $path = PVE::Storage::path($storecfg, $vmstate);
6071 PVE::QemuConfig->write_config($vmid, $conf);
6072 } else {
6073 mon_cmd($vmid, "stop");
6074 }
6075 });
6076
6077 if ($includestate) {
6078 # save vm state
6079 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
6080
6081 eval {
6082 set_migration_caps($vmid, 1);
6083 mon_cmd($vmid, "savevm-start", statefile => $path);
6084 for(;;) {
6085 my $state = mon_cmd($vmid, "query-savevm");
6086 if (!$state->{status}) {
6087 die "savevm not active\n";
6088 } elsif ($state->{status} eq 'active') {
6089 sleep(1);
6090 next;
6091 } elsif ($state->{status} eq 'completed') {
6092 print "State saved, quitting\n";
6093 last;
6094 } elsif ($state->{status} eq 'failed' && $state->{error}) {
6095 die "query-savevm failed with error '$state->{error}'\n"
6096 } else {
6097 die "query-savevm returned status '$state->{status}'\n";
6098 }
6099 }
6100 };
6101 my $err = $@;
6102
6103 PVE::QemuConfig->lock_config($vmid, sub {
6104 $conf = PVE::QemuConfig->load_config($vmid);
6105 if ($err) {
6106 # cleanup, but leave suspending lock, to indicate something went wrong
6107 eval {
6108 mon_cmd($vmid, "savevm-end");
6109 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6110 PVE::Storage::vdisk_free($storecfg, $vmstate);
6111 delete $conf->@{qw(vmstate runningmachine runningcpu)};
6112 PVE::QemuConfig->write_config($vmid, $conf);
6113 };
6114 warn $@ if $@;
6115 die $err;
6116 }
6117
6118 die "lock changed unexpectedly\n"
6119 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6120
6121 mon_cmd($vmid, "quit");
6122 $conf->{lock} = 'suspended';
6123 PVE::QemuConfig->write_config($vmid, $conf);
6124 });
6125 }
6126 }
6127
6128 sub vm_resume {
6129 my ($vmid, $skiplock, $nocheck) = @_;
6130
6131 PVE::QemuConfig->lock_config($vmid, sub {
6132 my $res = mon_cmd($vmid, 'query-status');
6133 my $resume_cmd = 'cont';
6134 my $reset = 0;
6135
6136 if ($res->{status}) {
6137 return if $res->{status} eq 'running'; # job done, go home
6138 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6139 $reset = 1 if $res->{status} eq 'shutdown';
6140 }
6141
6142 if (!$nocheck) {
6143
6144 my $conf = PVE::QemuConfig->load_config($vmid);
6145
6146 PVE::QemuConfig->check_lock($conf)
6147 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
6148 }
6149
6150 if ($reset) {
6151 # required if a VM shuts down during a backup and we get a resume
6152 # request before the backup finishes for example
6153 mon_cmd($vmid, "system_reset");
6154 }
6155 mon_cmd($vmid, $resume_cmd);
6156 });
6157 }
6158
6159 sub vm_sendkey {
6160 my ($vmid, $skiplock, $key) = @_;
6161
6162 PVE::QemuConfig->lock_config($vmid, sub {
6163
6164 my $conf = PVE::QemuConfig->load_config($vmid);
6165
6166 # there is no qmp command, so we use the human monitor command
6167 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
6168 die $res if $res ne '';
6169 });
6170 }
6171
6172 # vzdump restore implementaion
6173
6174 sub tar_archive_read_firstfile {
6175 my $archive = shift;
6176
6177 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6178
6179 # try to detect archive type first
6180 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
6181 die "unable to open file '$archive'\n";
6182 my $firstfile = <$fh>;
6183 kill 15, $pid;
6184 close $fh;
6185
6186 die "ERROR: archive contaions no data\n" if !$firstfile;
6187 chomp $firstfile;
6188
6189 return $firstfile;
6190 }
6191
6192 sub tar_restore_cleanup {
6193 my ($storecfg, $statfile) = @_;
6194
6195 print STDERR "starting cleanup\n";
6196
6197 if (my $fd = IO::File->new($statfile, "r")) {
6198 while (defined(my $line = <$fd>)) {
6199 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6200 my $volid = $2;
6201 eval {
6202 if ($volid =~ m|^/|) {
6203 unlink $volid || die 'unlink failed\n';
6204 } else {
6205 PVE::Storage::vdisk_free($storecfg, $volid);
6206 }
6207 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6208 };
6209 print STDERR "unable to cleanup '$volid' - $@" if $@;
6210 } else {
6211 print STDERR "unable to parse line in statfile - $line";
6212 }
6213 }
6214 $fd->close();
6215 }
6216 }
6217
6218 sub restore_file_archive {
6219 my ($archive, $vmid, $user, $opts) = @_;
6220
6221 return restore_vma_archive($archive, $vmid, $user, $opts)
6222 if $archive eq '-';
6223
6224 my $info = PVE::Storage::archive_info($archive);
6225 my $format = $opts->{format} // $info->{format};
6226 my $comp = $info->{compression};
6227
6228 # try to detect archive format
6229 if ($format eq 'tar') {
6230 return restore_tar_archive($archive, $vmid, $user, $opts);
6231 } else {
6232 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6233 }
6234 }
6235
6236 # hepler to remove disks that will not be used after restore
6237 my $restore_cleanup_oldconf = sub {
6238 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6239
6240 my $kept_disks = {};
6241
6242 PVE::QemuConfig->foreach_volume($oldconf, sub {
6243 my ($ds, $drive) = @_;
6244
6245 return if drive_is_cdrom($drive, 1);
6246
6247 my $volid = $drive->{file};
6248 return if !$volid || $volid =~ m|^/|;
6249
6250 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6251 return if !$path || !$owner || ($owner != $vmid);
6252
6253 # Note: only delete disk we want to restore
6254 # other volumes will become unused
6255 if ($virtdev_hash->{$ds}) {
6256 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6257 if (my $err = $@) {
6258 warn $err;
6259 }
6260 } else {
6261 $kept_disks->{$volid} = 1;
6262 }
6263 });
6264
6265 # after the restore we have no snapshots anymore
6266 for my $snapname (keys $oldconf->{snapshots}->%*) {
6267 my $snap = $oldconf->{snapshots}->{$snapname};
6268 if ($snap->{vmstate}) {
6269 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6270 if (my $err = $@) {
6271 warn $err;
6272 }
6273 }
6274
6275 for my $volid (keys $kept_disks->%*) {
6276 eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname); };
6277 warn $@ if $@;
6278 }
6279 }
6280 };
6281
6282 # Helper to parse vzdump backup device hints
6283 #
6284 # $rpcenv: Environment, used to ckeck storage permissions
6285 # $user: User ID, to check storage permissions
6286 # $storecfg: Storage configuration
6287 # $fh: the file handle for reading the configuration
6288 # $devinfo: should contain device sizes for all backu-up'ed devices
6289 # $options: backup options (pool, default storage)
6290 #
6291 # Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6292 my $parse_backup_hints = sub {
6293 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6294
6295 my $check_storage = sub { # assert if an image can be allocate
6296 my ($storeid, $scfg) = @_;
6297 die "Content type 'images' is not available on storage '$storeid'\n"
6298 if !$scfg->{content}->{images};
6299 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace'])
6300 if $user ne 'root@pam';
6301 };
6302
6303 my $virtdev_hash = {};
6304 while (defined(my $line = <$fh>)) {
6305 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6306 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6307 die "archive does not contain data for drive '$virtdev'\n"
6308 if !$devinfo->{$devname};
6309
6310 if (defined($options->{storage})) {
6311 $storeid = $options->{storage} || 'local';
6312 } elsif (!$storeid) {
6313 $storeid = 'local';
6314 }
6315 $format = 'raw' if !$format;
6316 $devinfo->{$devname}->{devname} = $devname;
6317 $devinfo->{$devname}->{virtdev} = $virtdev;
6318 $devinfo->{$devname}->{format} = $format;
6319 $devinfo->{$devname}->{storeid} = $storeid;
6320
6321 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6322 $check_storage->($storeid, $scfg); # permission and content type check
6323
6324 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6325 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6326 my $virtdev = $1;
6327 my $drive = parse_drive($virtdev, $2);
6328
6329 if (drive_is_cloudinit($drive)) {
6330 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6331 $storeid = $options->{storage} if defined ($options->{storage});
6332 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6333 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6334
6335 $check_storage->($storeid, $scfg); # permission and content type check
6336
6337 $virtdev_hash->{$virtdev} = {
6338 format => $format,
6339 storeid => $storeid,
6340 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6341 is_cloudinit => 1,
6342 };
6343 }
6344 }
6345 }
6346
6347 return $virtdev_hash;
6348 };
6349
6350 # Helper to allocate and activate all volumes required for a restore
6351 #
6352 # $storecfg: Storage configuration
6353 # $virtdev_hash: as returned by parse_backup_hints()
6354 #
6355 # Returns: { $virtdev => $volid }
6356 my $restore_allocate_devices = sub {
6357 my ($storecfg, $virtdev_hash, $vmid) = @_;
6358
6359 my $map = {};
6360 foreach my $virtdev (sort keys %$virtdev_hash) {
6361 my $d = $virtdev_hash->{$virtdev};
6362 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6363 my $storeid = $d->{storeid};
6364 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6365
6366 # test if requested format is supported
6367 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6368 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6369 $d->{format} = $defFormat if !$supported;
6370
6371 my $name;
6372 if ($d->{is_cloudinit}) {
6373 $name = "vm-$vmid-cloudinit";
6374 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6375 if ($scfg->{path}) {
6376 $name .= ".$d->{format}";
6377 }
6378 }
6379
6380 my $volid = PVE::Storage::vdisk_alloc(
6381 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6382
6383 print STDERR "new volume ID is '$volid'\n";
6384 $d->{volid} = $volid;
6385
6386 PVE::Storage::activate_volumes($storecfg, [$volid]);
6387
6388 $map->{$virtdev} = $volid;
6389 }
6390
6391 return $map;
6392 };
6393
6394 sub restore_update_config_line {
6395 my ($cookie, $map, $line, $unique) = @_;
6396
6397 return '' if $line =~ m/^\#qmdump\#/;
6398 return '' if $line =~ m/^\#vzdump\#/;
6399 return '' if $line =~ m/^lock:/;
6400 return '' if $line =~ m/^unused\d+:/;
6401 return '' if $line =~ m/^parent:/;
6402
6403 my $res = '';
6404
6405 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6406 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6407 # try to convert old 1.X settings
6408 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6409 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6410 my ($model, $macaddr) = split(/\=/, $devconfig);
6411 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6412 my $net = {
6413 model => $model,
6414 bridge => "vmbr$ind",
6415 macaddr => $macaddr,
6416 };
6417 my $netstr = print_net($net);
6418
6419 $res .= "net$cookie->{netcount}: $netstr\n";
6420 $cookie->{netcount}++;
6421 }
6422 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6423 my ($id, $netstr) = ($1, $2);
6424 my $net = parse_net($netstr);
6425 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6426 $netstr = print_net($net);
6427 $res .= "$id: $netstr\n";
6428 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6429 my $virtdev = $1;
6430 my $value = $3;
6431 my $di = parse_drive($virtdev, $value);
6432 if (defined($di->{backup}) && !$di->{backup}) {
6433 $res .= "#$line";
6434 } elsif ($map->{$virtdev}) {
6435 delete $di->{format}; # format can change on restore
6436 $di->{file} = $map->{$virtdev};
6437 $value = print_drive($di);
6438 $res .= "$virtdev: $value\n";
6439 } else {
6440 $res .= $line;
6441 }
6442 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6443 my $vmgenid = $1;
6444 if ($vmgenid ne '0') {
6445 # always generate a new vmgenid if there was a valid one setup
6446 $vmgenid = generate_uuid();
6447 }
6448 $res .= "vmgenid: $vmgenid\n";
6449 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6450 my ($uuid, $uuid_str);
6451 UUID::generate($uuid);
6452 UUID::unparse($uuid, $uuid_str);
6453 my $smbios1 = parse_smbios1($2);
6454 $smbios1->{uuid} = $uuid_str;
6455 $res .= $1.print_smbios1($smbios1)."\n";
6456 } else {
6457 $res .= $line;
6458 }
6459
6460 return $res;
6461 }
6462
6463 my $restore_deactivate_volumes = sub {
6464 my ($storecfg, $virtdev_hash) = @_;
6465
6466 my $vollist = [];
6467 for my $dev (values $virtdev_hash->%*) {
6468 push $vollist->@*, $dev->{volid} if $dev->{volid};
6469 }
6470
6471 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
6472 print STDERR $@ if $@;
6473 };
6474
6475 my $restore_destroy_volumes = sub {
6476 my ($storecfg, $virtdev_hash) = @_;
6477
6478 for my $dev (values $virtdev_hash->%*) {
6479 my $volid = $dev->{volid} or next;
6480 eval {
6481 PVE::Storage::vdisk_free($storecfg, $volid);
6482 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6483 };
6484 print STDERR "unable to cleanup '$volid' - $@" if $@;
6485 }
6486 };
6487
6488 my $restore_merge_config = sub {
6489 my ($filename, $backup_conf_raw, $override_conf) = @_;
6490
6491 my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
6492 for my $key (keys $override_conf->%*) {
6493 $backup_conf->{$key} = $override_conf->{$key};
6494 }
6495
6496 return $backup_conf;
6497 };
6498
6499 sub scan_volids {
6500 my ($cfg, $vmid) = @_;
6501
6502 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6503
6504 my $volid_hash = {};
6505 foreach my $storeid (keys %$info) {
6506 foreach my $item (@{$info->{$storeid}}) {
6507 next if !($item->{volid} && $item->{size});
6508 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6509 $volid_hash->{$item->{volid}} = $item;
6510 }
6511 }
6512
6513 return $volid_hash;
6514 }
6515
6516 sub update_disk_config {
6517 my ($vmid, $conf, $volid_hash) = @_;
6518
6519 my $changes;
6520 my $prefix = "VM $vmid";
6521
6522 # used and unused disks
6523 my $referenced = {};
6524
6525 # Note: it is allowed to define multiple storages with same path (alias), so
6526 # we need to check both 'volid' and real 'path' (two different volid can point
6527 # to the same path).
6528
6529 my $referencedpath = {};
6530
6531 # update size info
6532 PVE::QemuConfig->foreach_volume($conf, sub {
6533 my ($opt, $drive) = @_;
6534
6535 my $volid = $drive->{file};
6536 return if !$volid;
6537 my $volume = $volid_hash->{$volid};
6538
6539 # mark volid as "in-use" for next step
6540 $referenced->{$volid} = 1;
6541 if ($volume && (my $path = $volume->{path})) {
6542 $referencedpath->{$path} = 1;
6543 }
6544
6545 return if drive_is_cdrom($drive);
6546 return if !$volume;
6547
6548 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6549 if (defined($updated)) {
6550 $changes = 1;
6551 $conf->{$opt} = print_drive($updated);
6552 print "$prefix ($opt): $msg\n";
6553 }
6554 });
6555
6556 # remove 'unusedX' entry if volume is used
6557 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6558 my ($opt, $drive) = @_;
6559
6560 my $volid = $drive->{file};
6561 return if !$volid;
6562
6563 my $path;
6564 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6565 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6566 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6567 $changes = 1;
6568 delete $conf->{$opt};
6569 }
6570
6571 $referenced->{$volid} = 1;
6572 $referencedpath->{$path} = 1 if $path;
6573 });
6574
6575 foreach my $volid (sort keys %$volid_hash) {
6576 next if $volid =~ m/vm-$vmid-state-/;
6577 next if $referenced->{$volid};
6578 my $path = $volid_hash->{$volid}->{path};
6579 next if !$path; # just to be sure
6580 next if $referencedpath->{$path};
6581 $changes = 1;
6582 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6583 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6584 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6585 }
6586
6587 return $changes;
6588 }
6589
6590 sub rescan {
6591 my ($vmid, $nolock, $dryrun) = @_;
6592
6593 my $cfg = PVE::Storage::config();
6594
6595 print "rescan volumes...\n";
6596 my $volid_hash = scan_volids($cfg, $vmid);
6597
6598 my $updatefn = sub {
6599 my ($vmid) = @_;
6600
6601 my $conf = PVE::QemuConfig->load_config($vmid);
6602
6603 PVE::QemuConfig->check_lock($conf);
6604
6605 my $vm_volids = {};
6606 foreach my $volid (keys %$volid_hash) {
6607 my $info = $volid_hash->{$volid};
6608 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6609 }
6610
6611 my $changes = update_disk_config($vmid, $conf, $vm_volids);
6612
6613 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
6614 };
6615
6616 if (defined($vmid)) {
6617 if ($nolock) {
6618 &$updatefn($vmid);
6619 } else {
6620 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6621 }
6622 } else {
6623 my $vmlist = config_list();
6624 foreach my $vmid (keys %$vmlist) {
6625 if ($nolock) {
6626 &$updatefn($vmid);
6627 } else {
6628 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6629 }
6630 }
6631 }
6632 }
6633
6634 sub restore_proxmox_backup_archive {
6635 my ($archive, $vmid, $user, $options) = @_;
6636
6637 my $storecfg = PVE::Storage::config();
6638
6639 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
6640 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6641
6642 my $fingerprint = $scfg->{fingerprint};
6643 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
6644
6645 my $repo = PVE::PBSClient::get_repository($scfg);
6646 my $namespace = $scfg->{namespace};
6647
6648 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
6649 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
6650 local $ENV{PBS_PASSWORD} = $password;
6651 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
6652
6653 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
6654 PVE::Storage::parse_volname($storecfg, $archive);
6655
6656 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
6657
6658 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
6659
6660 my $tmpdir = "/var/tmp/vzdumptmp$$";
6661 rmtree $tmpdir;
6662 mkpath $tmpdir;
6663
6664 my $conffile = PVE::QemuConfig->config_file($vmid);
6665 # disable interrupts (always do cleanups)
6666 local $SIG{INT} =
6667 local $SIG{TERM} =
6668 local $SIG{QUIT} =
6669 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
6670
6671 # Note: $oldconf is undef if VM does not exists
6672 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6673 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6674 my $new_conf_raw = '';
6675
6676 my $rpcenv = PVE::RPCEnvironment::get();
6677 my $devinfo = {}; # info about drives included in backup
6678 my $virtdev_hash = {}; # info about allocated drives
6679
6680 eval {
6681 # enable interrupts
6682 local $SIG{INT} =
6683 local $SIG{TERM} =
6684 local $SIG{QUIT} =
6685 local $SIG{HUP} =
6686 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6687
6688 my $cfgfn = "$tmpdir/qemu-server.conf";
6689 my $firewall_config_fn = "$tmpdir/fw.conf";
6690 my $index_fn = "$tmpdir/index.json";
6691
6692 my $cmd = "restore";
6693
6694 my $param = [$pbs_backup_name, "index.json", $index_fn];
6695 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6696 my $index = PVE::Tools::file_get_contents($index_fn);
6697 $index = decode_json($index);
6698
6699 foreach my $info (@{$index->{files}}) {
6700 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
6701 my $devname = $1;
6702 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
6703 $devinfo->{$devname}->{size} = $1;
6704 } else {
6705 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
6706 }
6707 }
6708 }
6709
6710 my $is_qemu_server_backup = scalar(
6711 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
6712 );
6713 if (!$is_qemu_server_backup) {
6714 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
6715 }
6716 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
6717
6718 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
6719 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6720
6721 if ($has_firewall_config) {
6722 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
6723 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6724
6725 my $pve_firewall_dir = '/etc/pve/firewall';
6726 mkdir $pve_firewall_dir; # make sure the dir exists
6727 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
6728 }
6729
6730 my $fh = IO::File->new($cfgfn, "r") ||
6731 die "unable to read qemu-server.conf - $!\n";
6732
6733 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
6734
6735 # fixme: rate limit?
6736
6737 # create empty/temp config
6738 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
6739
6740 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
6741
6742 # allocate volumes
6743 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
6744
6745 foreach my $virtdev (sort keys %$virtdev_hash) {
6746 my $d = $virtdev_hash->{$virtdev};
6747 next if $d->{is_cloudinit}; # no need to restore cloudinit
6748
6749 # this fails if storage is unavailable
6750 my $volid = $d->{volid};
6751 my $path = PVE::Storage::path($storecfg, $volid);
6752
6753 # for live-restore we only want to preload the efidisk and TPM state
6754 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
6755
6756 my @ns_arg;
6757 if (defined(my $ns = $scfg->{namespace})) {
6758 @ns_arg = ('--ns', $ns);
6759 }
6760
6761 my $pbs_restore_cmd = [
6762 '/usr/bin/pbs-restore',
6763 '--repository', $repo,
6764 @ns_arg,
6765 $pbs_backup_name,
6766 "$d->{devname}.img.fidx",
6767 $path,
6768 '--verbose',
6769 ];
6770
6771 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
6772 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
6773
6774 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
6775 push @$pbs_restore_cmd, '--skip-zero';
6776 }
6777
6778 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
6779 print "restore proxmox backup image: $dbg_cmdstring\n";
6780 run_command($pbs_restore_cmd);
6781 }
6782
6783 $fh->seek(0, 0) || die "seek failed - $!\n";
6784
6785 my $cookie = { netcount => 0 };
6786 while (defined(my $line = <$fh>)) {
6787 $new_conf_raw .= restore_update_config_line(
6788 $cookie,
6789 $map,
6790 $line,
6791 $options->{unique},
6792 );
6793 }
6794
6795 $fh->close();
6796 };
6797 my $err = $@;
6798
6799 if ($err || !$options->{live}) {
6800 $restore_deactivate_volumes->($storecfg, $virtdev_hash);
6801 }
6802
6803 rmtree $tmpdir;
6804
6805 if ($err) {
6806 $restore_destroy_volumes->($storecfg, $virtdev_hash);
6807 die $err;
6808 }
6809
6810 if ($options->{live}) {
6811 # keep lock during live-restore
6812 $new_conf_raw .= "\nlock: create";
6813 }
6814
6815 my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $options->{override_conf});
6816 PVE::QemuConfig->write_config($vmid, $new_conf);
6817
6818 eval { rescan($vmid, 1); };
6819 warn $@ if $@;
6820
6821 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
6822
6823 if ($options->{live}) {
6824 # enable interrupts
6825 local $SIG{INT} =
6826 local $SIG{TERM} =
6827 local $SIG{QUIT} =
6828 local $SIG{HUP} =
6829 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
6830
6831 my $conf = PVE::QemuConfig->load_config($vmid);
6832 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
6833
6834 # these special drives are already restored before start
6835 delete $devinfo->{'drive-efidisk0'};
6836 delete $devinfo->{'drive-tpmstate0-backup'};
6837
6838 my $pbs_opts = {
6839 repo => $repo,
6840 keyfile => $keyfile,
6841 snapshot => $pbs_backup_name,
6842 namespace => $namespace,
6843 };
6844 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $pbs_opts);
6845
6846 PVE::QemuConfig->remove_lock($vmid, "create");
6847 }
6848 }
6849
6850 sub pbs_live_restore {
6851 my ($vmid, $conf, $storecfg, $restored_disks, $opts) = @_;
6852
6853 print "starting VM for live-restore\n";
6854 print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n";
6855
6856 my $pbs_backing = {};
6857 for my $ds (keys %$restored_disks) {
6858 $ds =~ m/^drive-(.*)$/;
6859 my $confname = $1;
6860 $pbs_backing->{$confname} = {
6861 repository => $opts->{repo},
6862 snapshot => $opts->{snapshot},
6863 archive => "$ds.img.fidx",
6864 };
6865 $pbs_backing->{$confname}->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile};
6866 $pbs_backing->{$confname}->{namespace} = $opts->{namespace} if defined($opts->{namespace});
6867
6868 my $drive = parse_drive($confname, $conf->{$confname});
6869 print "restoring '$ds' to '$drive->{file}'\n";
6870 }
6871
6872 my $drives_streamed = 0;
6873 eval {
6874 # make sure HA doesn't interrupt our restore by stopping the VM
6875 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
6876 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
6877 }
6878
6879 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
6880 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
6881 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
6882
6883 my $qmeventd_fd = register_qmeventd_handle($vmid);
6884
6885 # begin streaming, i.e. data copy from PBS to target disk for every vol,
6886 # this will effectively collapse the backing image chain consisting of
6887 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
6888 # removes itself once all backing images vanish with 'auto-remove=on')
6889 my $jobs = {};
6890 for my $ds (sort keys %$restored_disks) {
6891 my $job_id = "restore-$ds";
6892 mon_cmd($vmid, 'block-stream',
6893 'job-id' => $job_id,
6894 device => "$ds",
6895 );
6896 $jobs->{$job_id} = {};
6897 }
6898
6899 mon_cmd($vmid, 'cont');
6900 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
6901
6902 print "restore-drive jobs finished successfully, removing all tracking block devices"
6903 ." to disconnect from Proxmox Backup Server\n";
6904
6905 for my $ds (sort keys %$restored_disks) {
6906 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
6907 }
6908
6909 close($qmeventd_fd);
6910 };
6911
6912 my $err = $@;
6913
6914 if ($err) {
6915 warn "An error occurred during live-restore: $err\n";
6916 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
6917 die "live-restore failed\n";
6918 }
6919 }
6920
6921 sub restore_vma_archive {
6922 my ($archive, $vmid, $user, $opts, $comp) = @_;
6923
6924 my $readfrom = $archive;
6925
6926 my $cfg = PVE::Storage::config();
6927 my $commands = [];
6928 my $bwlimit = $opts->{bwlimit};
6929
6930 my $dbg_cmdstring = '';
6931 my $add_pipe = sub {
6932 my ($cmd) = @_;
6933 push @$commands, $cmd;
6934 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
6935 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
6936 $readfrom = '-';
6937 };
6938
6939 my $input = undef;
6940 if ($archive eq '-') {
6941 $input = '<&STDIN';
6942 } else {
6943 # If we use a backup from a PVE defined storage we also consider that
6944 # storage's rate limit:
6945 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
6946 if (defined($volid)) {
6947 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
6948 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
6949 if ($readlimit) {
6950 print STDERR "applying read rate limit: $readlimit\n";
6951 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
6952 $add_pipe->($cstream);
6953 }
6954 }
6955 }
6956
6957 if ($comp) {
6958 my $info = PVE::Storage::decompressor_info('vma', $comp);
6959 my $cmd = $info->{decompressor};
6960 push @$cmd, $readfrom;
6961 $add_pipe->($cmd);
6962 }
6963
6964 my $tmpdir = "/var/tmp/vzdumptmp$$";
6965 rmtree $tmpdir;
6966
6967 # disable interrupts (always do cleanups)
6968 local $SIG{INT} =
6969 local $SIG{TERM} =
6970 local $SIG{QUIT} =
6971 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
6972
6973 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
6974 POSIX::mkfifo($mapfifo, 0600);
6975 my $fifofh;
6976 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
6977
6978 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
6979
6980 my $oldtimeout;
6981 my $timeout = 5;
6982
6983 my $devinfo = {}; # info about drives included in backup
6984 my $virtdev_hash = {}; # info about allocated drives
6985
6986 my $rpcenv = PVE::RPCEnvironment::get();
6987
6988 my $conffile = PVE::QemuConfig->config_file($vmid);
6989
6990 # Note: $oldconf is undef if VM does not exist
6991 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6992 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6993 my $new_conf_raw = '';
6994
6995 my %storage_limits;
6996
6997 my $print_devmap = sub {
6998 my $cfgfn = "$tmpdir/qemu-server.conf";
6999
7000 # we can read the config - that is already extracted
7001 my $fh = IO::File->new($cfgfn, "r") ||
7002 die "unable to read qemu-server.conf - $!\n";
7003
7004 my $fwcfgfn = "$tmpdir/qemu-server.fw";
7005 if (-f $fwcfgfn) {
7006 my $pve_firewall_dir = '/etc/pve/firewall';
7007 mkdir $pve_firewall_dir; # make sure the dir exists
7008 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
7009 }
7010
7011 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
7012
7013 foreach my $info (values %{$virtdev_hash}) {
7014 my $storeid = $info->{storeid};
7015 next if defined($storage_limits{$storeid});
7016
7017 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
7018 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
7019 $storage_limits{$storeid} = $limit * 1024;
7020 }
7021
7022 foreach my $devname (keys %$devinfo) {
7023 die "found no device mapping information for device '$devname'\n"
7024 if !$devinfo->{$devname}->{virtdev};
7025 }
7026
7027 # create empty/temp config
7028 if ($oldconf) {
7029 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
7030 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
7031 }
7032
7033 # allocate volumes
7034 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
7035
7036 # print restore information to $fifofh
7037 foreach my $virtdev (sort keys %$virtdev_hash) {
7038 my $d = $virtdev_hash->{$virtdev};
7039 next if $d->{is_cloudinit}; # no need to restore cloudinit
7040
7041 my $storeid = $d->{storeid};
7042 my $volid = $d->{volid};
7043
7044 my $map_opts = '';
7045 if (my $limit = $storage_limits{$storeid}) {
7046 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
7047 }
7048
7049 my $write_zeros = 1;
7050 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
7051 $write_zeros = 0;
7052 }
7053
7054 my $path = PVE::Storage::path($cfg, $volid);
7055
7056 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
7057
7058 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
7059 }
7060
7061 $fh->seek(0, 0) || die "seek failed - $!\n";
7062
7063 my $cookie = { netcount => 0 };
7064 while (defined(my $line = <$fh>)) {
7065 $new_conf_raw .= restore_update_config_line(
7066 $cookie,
7067 $map,
7068 $line,
7069 $opts->{unique},
7070 );
7071 }
7072
7073 $fh->close();
7074 };
7075
7076 eval {
7077 # enable interrupts
7078 local $SIG{INT} =
7079 local $SIG{TERM} =
7080 local $SIG{QUIT} =
7081 local $SIG{HUP} =
7082 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7083 local $SIG{ALRM} = sub { die "got timeout\n"; };
7084
7085 $oldtimeout = alarm($timeout);
7086
7087 my $parser = sub {
7088 my $line = shift;
7089
7090 print "$line\n";
7091
7092 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
7093 my ($dev_id, $size, $devname) = ($1, $2, $3);
7094 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
7095 } elsif ($line =~ m/^CTIME: /) {
7096 # we correctly received the vma config, so we can disable
7097 # the timeout now for disk allocation (set to 10 minutes, so
7098 # that we always timeout if something goes wrong)
7099 alarm(600);
7100 &$print_devmap();
7101 print $fifofh "done\n";
7102 my $tmp = $oldtimeout || 0;
7103 $oldtimeout = undef;
7104 alarm($tmp);
7105 close($fifofh);
7106 $fifofh = undef;
7107 }
7108 };
7109
7110 print "restore vma archive: $dbg_cmdstring\n";
7111 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
7112 };
7113 my $err = $@;
7114
7115 alarm($oldtimeout) if $oldtimeout;
7116
7117 $restore_deactivate_volumes->($cfg, $virtdev_hash);
7118
7119 close($fifofh) if $fifofh;
7120 unlink $mapfifo;
7121 rmtree $tmpdir;
7122
7123 if ($err) {
7124 $restore_destroy_volumes->($cfg, $virtdev_hash);
7125 die $err;
7126 }
7127
7128 my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $opts->{override_conf});
7129 PVE::QemuConfig->write_config($vmid, $new_conf);
7130
7131 eval { rescan($vmid, 1); };
7132 warn $@ if $@;
7133
7134 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
7135 }
7136
7137 sub restore_tar_archive {
7138 my ($archive, $vmid, $user, $opts) = @_;
7139
7140 if (scalar(keys $opts->{override_conf}->%*) > 0) {
7141 my $keystring = join(' ', keys $opts->{override_conf}->%*);
7142 die "cannot pass along options ($keystring) when restoring from tar archive\n";
7143 }
7144
7145 if ($archive ne '-') {
7146 my $firstfile = tar_archive_read_firstfile($archive);
7147 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
7148 if $firstfile ne 'qemu-server.conf';
7149 }
7150
7151 my $storecfg = PVE::Storage::config();
7152
7153 # avoid zombie disks when restoring over an existing VM -> cleanup first
7154 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
7155 # skiplock=1 because qmrestore has set the 'create' lock itself already
7156 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
7157 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
7158
7159 my $tocmd = "/usr/lib/qemu-server/qmextract";
7160
7161 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
7162 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
7163 $tocmd .= ' --prealloc' if $opts->{prealloc};
7164 $tocmd .= ' --info' if $opts->{info};
7165
7166 # tar option "xf" does not autodetect compression when read from STDIN,
7167 # so we pipe to zcat
7168 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7169 PVE::Tools::shellquote("--to-command=$tocmd");
7170
7171 my $tmpdir = "/var/tmp/vzdumptmp$$";
7172 mkpath $tmpdir;
7173
7174 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7175 local $ENV{VZDUMP_VMID} = $vmid;
7176 local $ENV{VZDUMP_USER} = $user;
7177
7178 my $conffile = PVE::QemuConfig->config_file($vmid);
7179 my $new_conf_raw = '';
7180
7181 # disable interrupts (always do cleanups)
7182 local $SIG{INT} =
7183 local $SIG{TERM} =
7184 local $SIG{QUIT} =
7185 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7186
7187 eval {
7188 # enable interrupts
7189 local $SIG{INT} =
7190 local $SIG{TERM} =
7191 local $SIG{QUIT} =
7192 local $SIG{HUP} =
7193 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7194
7195 if ($archive eq '-') {
7196 print "extracting archive from STDIN\n";
7197 run_command($cmd, input => "<&STDIN");
7198 } else {
7199 print "extracting archive '$archive'\n";
7200 run_command($cmd);
7201 }
7202
7203 return if $opts->{info};
7204
7205 # read new mapping
7206 my $map = {};
7207 my $statfile = "$tmpdir/qmrestore.stat";
7208 if (my $fd = IO::File->new($statfile, "r")) {
7209 while (defined (my $line = <$fd>)) {
7210 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7211 $map->{$1} = $2 if $1;
7212 } else {
7213 print STDERR "unable to parse line in statfile - $line\n";
7214 }
7215 }
7216 $fd->close();
7217 }
7218
7219 my $confsrc = "$tmpdir/qemu-server.conf";
7220
7221 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
7222
7223 my $cookie = { netcount => 0 };
7224 while (defined (my $line = <$srcfd>)) {
7225 $new_conf_raw .= restore_update_config_line(
7226 $cookie,
7227 $map,
7228 $line,
7229 $opts->{unique},
7230 );
7231 }
7232
7233 $srcfd->close();
7234 };
7235 if (my $err = $@) {
7236 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7237 die $err;
7238 }
7239
7240 rmtree $tmpdir;
7241
7242 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7243
7244 PVE::Cluster::cfs_update(); # make sure we read new file
7245
7246 eval { rescan($vmid, 1); };
7247 warn $@ if $@;
7248 };
7249
7250 sub foreach_storage_used_by_vm {
7251 my ($conf, $func) = @_;
7252
7253 my $sidhash = {};
7254
7255 PVE::QemuConfig->foreach_volume($conf, sub {
7256 my ($ds, $drive) = @_;
7257 return if drive_is_cdrom($drive);
7258
7259 my $volid = $drive->{file};
7260
7261 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7262 $sidhash->{$sid} = $sid if $sid;
7263 });
7264
7265 foreach my $sid (sort keys %$sidhash) {
7266 &$func($sid);
7267 }
7268 }
7269
7270 my $qemu_snap_storage = {
7271 rbd => 1,
7272 };
7273 sub do_snapshots_with_qemu {
7274 my ($storecfg, $volid, $deviceid) = @_;
7275
7276 return if $deviceid =~ m/tpmstate0/;
7277
7278 my $storage_name = PVE::Storage::parse_volume_id($volid);
7279 my $scfg = $storecfg->{ids}->{$storage_name};
7280 die "could not find storage '$storage_name'\n" if !defined($scfg);
7281
7282 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7283 return 1;
7284 }
7285
7286 if ($volid =~ m/\.(qcow2|qed)$/){
7287 return 1;
7288 }
7289
7290 return;
7291 }
7292
7293 sub qga_check_running {
7294 my ($vmid, $nowarn) = @_;
7295
7296 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7297 if ($@) {
7298 warn "Qemu Guest Agent is not running - $@" if !$nowarn;
7299 return 0;
7300 }
7301 return 1;
7302 }
7303
7304 sub template_create {
7305 my ($vmid, $conf, $disk) = @_;
7306
7307 my $storecfg = PVE::Storage::config();
7308
7309 PVE::QemuConfig->foreach_volume($conf, sub {
7310 my ($ds, $drive) = @_;
7311
7312 return if drive_is_cdrom($drive);
7313 return if $disk && $ds ne $disk;
7314
7315 my $volid = $drive->{file};
7316 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7317
7318 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7319 $drive->{file} = $voliddst;
7320 $conf->{$ds} = print_drive($drive);
7321 PVE::QemuConfig->write_config($vmid, $conf);
7322 });
7323 }
7324
7325 sub convert_iscsi_path {
7326 my ($path) = @_;
7327
7328 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7329 my $portal = $1;
7330 my $target = $2;
7331 my $lun = $3;
7332
7333 my $initiator_name = get_initiator_name();
7334
7335 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7336 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7337 }
7338
7339 die "cannot convert iscsi path '$path', unkown format\n";
7340 }
7341
7342 sub qemu_img_convert {
7343 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized) = @_;
7344
7345 my $storecfg = PVE::Storage::config();
7346 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7347 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7348
7349 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7350
7351 my $cachemode;
7352 my $src_path;
7353 my $src_is_iscsi = 0;
7354 my $src_format;
7355
7356 if ($src_storeid) {
7357 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7358 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7359 $src_format = qemu_img_format($src_scfg, $src_volname);
7360 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7361 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7362 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7363 } elsif (-f $src_volid || -b $src_volid) {
7364 $src_path = $src_volid;
7365 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7366 $src_format = $1;
7367 }
7368 }
7369
7370 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7371
7372 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7373 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7374 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7375 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7376
7377 my $cmd = [];
7378 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7379 push @$cmd, '-l', "snapshot.name=$snapname"
7380 if $snapname && $src_format && $src_format eq "qcow2";
7381 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7382 push @$cmd, '-T', $cachemode if defined($cachemode);
7383
7384 if ($src_is_iscsi) {
7385 push @$cmd, '--image-opts';
7386 $src_path = convert_iscsi_path($src_path);
7387 } elsif ($src_format) {
7388 push @$cmd, '-f', $src_format;
7389 }
7390
7391 if ($dst_is_iscsi) {
7392 push @$cmd, '--target-image-opts';
7393 $dst_path = convert_iscsi_path($dst_path);
7394 } else {
7395 push @$cmd, '-O', $dst_format;
7396 }
7397
7398 push @$cmd, $src_path;
7399
7400 if (!$dst_is_iscsi && $is_zero_initialized) {
7401 push @$cmd, "zeroinit:$dst_path";
7402 } else {
7403 push @$cmd, $dst_path;
7404 }
7405
7406 my $parser = sub {
7407 my $line = shift;
7408 if($line =~ m/\((\S+)\/100\%\)/){
7409 my $percent = $1;
7410 my $transferred = int($size * $percent / 100);
7411 my $total_h = render_bytes($size, 1);
7412 my $transferred_h = render_bytes($transferred, 1);
7413
7414 print "transferred $transferred_h of $total_h ($percent%)\n";
7415 }
7416
7417 };
7418
7419 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7420 my $err = $@;
7421 die "copy failed: $err" if $err;
7422 }
7423
7424 sub qemu_img_format {
7425 my ($scfg, $volname) = @_;
7426
7427 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7428 return $1;
7429 } else {
7430 return "raw";
7431 }
7432 }
7433
7434 sub qemu_drive_mirror {
7435 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7436
7437 $jobs = {} if !$jobs;
7438
7439 my $qemu_target;
7440 my $format;
7441 $jobs->{"drive-$drive"} = {};
7442
7443 if ($dst_volid =~ /^nbd:/) {
7444 $qemu_target = $dst_volid;
7445 $format = "nbd";
7446 } else {
7447 my $storecfg = PVE::Storage::config();
7448 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7449
7450 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7451
7452 $format = qemu_img_format($dst_scfg, $dst_volname);
7453
7454 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7455
7456 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7457 }
7458
7459 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7460 $opts->{format} = $format if $format;
7461
7462 if (defined($src_bitmap)) {
7463 $opts->{sync} = 'incremental';
7464 $opts->{bitmap} = $src_bitmap;
7465 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7466 }
7467
7468 if (defined($bwlimit)) {
7469 $opts->{speed} = $bwlimit * 1024;
7470 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7471 } else {
7472 print "drive mirror is starting for drive-$drive\n";
7473 }
7474
7475 # if a job already runs for this device we get an error, catch it for cleanup
7476 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7477 if (my $err = $@) {
7478 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7479 warn "$@\n" if $@;
7480 die "mirroring error: $err\n";
7481 }
7482
7483 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7484 }
7485
7486 # $completion can be either
7487 # 'complete': wait until all jobs are ready, block-job-complete them (default)
7488 # 'cancel': wait until all jobs are ready, block-job-cancel them
7489 # 'skip': wait until all jobs are ready, return with block jobs in ready state
7490 # 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7491 sub qemu_drive_mirror_monitor {
7492 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7493
7494 $completion //= 'complete';
7495 $op //= "mirror";
7496
7497 eval {
7498 my $err_complete = 0;
7499
7500 my $starttime = time ();
7501 while (1) {
7502 die "block job ('$op') timed out\n" if $err_complete > 300;
7503
7504 my $stats = mon_cmd($vmid, "query-block-jobs");
7505 my $ctime = time();
7506
7507 my $running_jobs = {};
7508 for my $stat (@$stats) {
7509 next if $stat->{type} ne $op;
7510 $running_jobs->{$stat->{device}} = $stat;
7511 }
7512
7513 my $readycounter = 0;
7514
7515 for my $job_id (sort keys %$jobs) {
7516 my $job = $running_jobs->{$job_id};
7517
7518 my $vanished = !defined($job);
7519 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7520 if($complete || ($vanished && $completion eq 'auto')) {
7521 print "$job_id: $op-job finished\n";
7522 delete $jobs->{$job_id};
7523 next;
7524 }
7525
7526 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7527
7528 my $busy = $job->{busy};
7529 my $ready = $job->{ready};
7530 if (my $total = $job->{len}) {
7531 my $transferred = $job->{offset} || 0;
7532 my $remaining = $total - $transferred;
7533 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7534
7535 my $duration = $ctime - $starttime;
7536 my $total_h = render_bytes($total, 1);
7537 my $transferred_h = render_bytes($transferred, 1);
7538
7539 my $status = sprintf(
7540 "transferred $transferred_h of $total_h ($percent%%) in %s",
7541 render_duration($duration),
7542 );
7543
7544 if ($ready) {
7545 if ($busy) {
7546 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7547 } else {
7548 $status .= ", ready";
7549 }
7550 }
7551 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7552 $jobs->{$job_id}->{ready} = $ready;
7553 }
7554
7555 $readycounter++ if $job->{ready};
7556 }
7557
7558 last if scalar(keys %$jobs) == 0;
7559
7560 if ($readycounter == scalar(keys %$jobs)) {
7561 print "all '$op' jobs are ready\n";
7562
7563 # do the complete later (or has already been done)
7564 last if $completion eq 'skip' || $completion eq 'auto';
7565
7566 if ($vmiddst && $vmiddst != $vmid) {
7567 my $agent_running = $qga && qga_check_running($vmid);
7568 if ($agent_running) {
7569 print "freeze filesystem\n";
7570 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
7571 warn $@ if $@;
7572 } else {
7573 print "suspend vm\n";
7574 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
7575 warn $@ if $@;
7576 }
7577
7578 # if we clone a disk for a new target vm, we don't switch the disk
7579 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
7580
7581 if ($agent_running) {
7582 print "unfreeze filesystem\n";
7583 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
7584 warn $@ if $@;
7585 } else {
7586 print "resume vm\n";
7587 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7588 warn $@ if $@;
7589 }
7590
7591 last;
7592 } else {
7593
7594 for my $job_id (sort keys %$jobs) {
7595 # try to switch the disk if source and destination are on the same guest
7596 print "$job_id: Completing block job_id...\n";
7597
7598 my $op;
7599 if ($completion eq 'complete') {
7600 $op = 'block-job-complete';
7601 } elsif ($completion eq 'cancel') {
7602 $op = 'block-job-cancel';
7603 } else {
7604 die "invalid completion value: $completion\n";
7605 }
7606 eval { mon_cmd($vmid, $op, device => $job_id) };
7607 if ($@ =~ m/cannot be completed/) {
7608 print "$job_id: block job cannot be completed, trying again.\n";
7609 $err_complete++;
7610 }else {
7611 print "$job_id: Completed successfully.\n";
7612 $jobs->{$job_id}->{complete} = 1;
7613 }
7614 }
7615 }
7616 }
7617 sleep 1;
7618 }
7619 };
7620 my $err = $@;
7621
7622 if ($err) {
7623 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7624 die "block job ($op) error: $err";
7625 }
7626 }
7627
7628 sub qemu_blockjobs_cancel {
7629 my ($vmid, $jobs) = @_;
7630
7631 foreach my $job (keys %$jobs) {
7632 print "$job: Cancelling block job\n";
7633 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
7634 $jobs->{$job}->{cancel} = 1;
7635 }
7636
7637 while (1) {
7638 my $stats = mon_cmd($vmid, "query-block-jobs");
7639
7640 my $running_jobs = {};
7641 foreach my $stat (@$stats) {
7642 $running_jobs->{$stat->{device}} = $stat;
7643 }
7644
7645 foreach my $job (keys %$jobs) {
7646
7647 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
7648 print "$job: Done.\n";
7649 delete $jobs->{$job};
7650 }
7651 }
7652
7653 last if scalar(keys %$jobs) == 0;
7654
7655 sleep 1;
7656 }
7657 }
7658
7659 sub clone_disk {
7660 my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
7661
7662 my ($vmid, $running) = $source->@{qw(vmid running)};
7663 my ($src_drivename, $drive, $snapname) = $source->@{qw(drivename drive snapname)};
7664
7665 my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)};
7666 my ($storage, $format) = $dest->@{qw(storage format)};
7667
7668 my $use_drive_mirror = $full && $running && $src_drivename && !$snapname;
7669
7670 if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) {
7671 die "cloning from/to EFI disk requires EFI disk\n"
7672 if $src_drivename eq 'efidisk0' || $dst_drivename eq 'efidisk0';
7673 die "cloning from/to TPM state requires TPM state\n"
7674 if $src_drivename eq 'tpmstate0' || $dst_drivename eq 'tpmstate0';
7675
7676 # This would lead to two device nodes in QEMU pointing to the same backing image!
7677 die "cannot change drive name when cloning disk from/to the same VM\n"
7678 if $use_drive_mirror && $vmid == $newvmid;
7679 }
7680
7681 die "cannot move TPM state while VM is running\n"
7682 if $use_drive_mirror && $src_drivename eq 'tpmstate0';
7683
7684 my $newvolid;
7685
7686 print "create " . ($full ? 'full' : 'linked') . " clone of drive ";
7687 print "$src_drivename " if $src_drivename;
7688 print "($drive->{file})\n";
7689
7690 if (!$full) {
7691 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
7692 push @$newvollist, $newvolid;
7693 } else {
7694
7695 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
7696 $storeid = $storage if $storage;
7697
7698 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
7699
7700 my $name = undef;
7701 my $size = undef;
7702 if (drive_is_cloudinit($drive)) {
7703 $name = "vm-$newvmid-cloudinit";
7704 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7705 if ($scfg->{path}) {
7706 $name .= ".$dst_format";
7707 }
7708 $snapname = undef;
7709 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
7710 } elsif ($dst_drivename eq 'efidisk0') {
7711 $size = $efisize or die "internal error - need to specify EFI disk size\n";
7712 } elsif ($dst_drivename eq 'tpmstate0') {
7713 $dst_format = 'raw';
7714 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7715 } else {
7716 ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
7717 }
7718 $newvolid = PVE::Storage::vdisk_alloc(
7719 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
7720 );
7721 push @$newvollist, $newvolid;
7722
7723 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
7724
7725 if (drive_is_cloudinit($drive)) {
7726 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
7727 # if this is the case, we have to complete any block-jobs still there from
7728 # previous drive-mirrors
7729 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
7730 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
7731 }
7732 goto no_data_clone;
7733 }
7734
7735 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
7736 if ($use_drive_mirror) {
7737 qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
7738 $completion, $qga, $bwlimit);
7739 } else {
7740 # TODO: handle bwlimits
7741 if ($dst_drivename eq 'efidisk0') {
7742 # the relevant data on the efidisk may be smaller than the source
7743 # e.g. on RBD/ZFS, so we use dd to copy only the amount
7744 # that is given by the OVMF_VARS.fd
7745 my $src_path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
7746 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
7747
7748 my $src_format = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[6];
7749
7750 # better for Ceph if block size is not too small, see bug #3324
7751 my $bs = 1024*1024;
7752
7753 my $cmd = ['qemu-img', 'dd', '-n', '-O', $dst_format];
7754
7755 if ($src_format eq 'qcow2' && $snapname) {
7756 die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
7757 if !min_version(kvm_user_version(), 6, 2);
7758 push $cmd->@*, '-l', $snapname;
7759 }
7760 push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
7761 run_command($cmd);
7762 } else {
7763 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit);
7764 }
7765 }
7766 }
7767
7768 no_data_clone:
7769 my ($size) = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
7770
7771 my $disk = dclone($drive);
7772 delete $disk->{format};
7773 $disk->{file} = $newvolid;
7774 $disk->{size} = $size if defined($size);
7775
7776 return $disk;
7777 }
7778
7779 sub get_running_qemu_version {
7780 my ($vmid) = @_;
7781 my $res = mon_cmd($vmid, "query-version");
7782 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
7783 }
7784
7785 sub qemu_use_old_bios_files {
7786 my ($machine_type) = @_;
7787
7788 return if !$machine_type;
7789
7790 my $use_old_bios_files = undef;
7791
7792 if ($machine_type =~ m/^(\S+)\.pxe$/) {
7793 $machine_type = $1;
7794 $use_old_bios_files = 1;
7795 } else {
7796 my $version = extract_version($machine_type, kvm_user_version());
7797 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
7798 # load new efi bios files on migration. So this hack is required to allow
7799 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
7800 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
7801 $use_old_bios_files = !min_version($version, 2, 4);
7802 }
7803
7804 return ($use_old_bios_files, $machine_type);
7805 }
7806
7807 sub get_efivars_size {
7808 my ($conf, $efidisk) = @_;
7809
7810 my $arch = get_vm_arch($conf);
7811 $efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
7812 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
7813 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7814 die "uefi vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
7815 return -s $ovmf_vars;
7816 }
7817
7818 sub update_efidisk_size {
7819 my ($conf) = @_;
7820
7821 return if !defined($conf->{efidisk0});
7822
7823 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
7824 $disk->{size} = get_efivars_size($conf);
7825 $conf->{efidisk0} = print_drive($disk);
7826
7827 return;
7828 }
7829
7830 sub update_tpmstate_size {
7831 my ($conf) = @_;
7832
7833 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
7834 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7835 $conf->{tpmstate0} = print_drive($disk);
7836 }
7837
7838 sub create_efidisk($$$$$$$) {
7839 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
7840
7841 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7842 die "EFI vars default image not found\n" if ! -f $ovmf_vars;
7843
7844 my $vars_size_b = -s $ovmf_vars;
7845 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
7846 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
7847 PVE::Storage::activate_volumes($storecfg, [$volid]);
7848
7849 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
7850 my ($size) = PVE::Storage::volume_size_info($storecfg, $volid, 3);
7851
7852 return ($volid, $size/1024);
7853 }
7854
7855 sub vm_iothreads_list {
7856 my ($vmid) = @_;
7857
7858 my $res = mon_cmd($vmid, 'query-iothreads');
7859
7860 my $iothreads = {};
7861 foreach my $iothread (@$res) {
7862 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
7863 }
7864
7865 return $iothreads;
7866 }
7867
7868 sub scsihw_infos {
7869 my ($conf, $drive) = @_;
7870
7871 my $maxdev = 0;
7872
7873 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
7874 $maxdev = 7;
7875 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
7876 $maxdev = 1;
7877 } else {
7878 $maxdev = 256;
7879 }
7880
7881 my $controller = int($drive->{index} / $maxdev);
7882 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
7883 ? "virtioscsi"
7884 : "scsihw";
7885
7886 return ($maxdev, $controller, $controller_prefix);
7887 }
7888
7889 sub windows_version {
7890 my ($ostype) = @_;
7891
7892 return 0 if !$ostype;
7893
7894 my $winversion = 0;
7895
7896 if($ostype eq 'wxp' || $ostype eq 'w2k3' || $ostype eq 'w2k') {
7897 $winversion = 5;
7898 } elsif($ostype eq 'w2k8' || $ostype eq 'wvista') {
7899 $winversion = 6;
7900 } elsif ($ostype =~ m/^win(\d+)$/) {
7901 $winversion = $1;
7902 }
7903
7904 return $winversion;
7905 }
7906
7907 sub resolve_dst_disk_format {
7908 my ($storecfg, $storeid, $src_volname, $format) = @_;
7909 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
7910
7911 if (!$format) {
7912 # if no target format is specified, use the source disk format as hint
7913 if ($src_volname) {
7914 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7915 $format = qemu_img_format($scfg, $src_volname);
7916 } else {
7917 return $defFormat;
7918 }
7919 }
7920
7921 # test if requested format is supported - else use default
7922 my $supported = grep { $_ eq $format } @$validFormats;
7923 $format = $defFormat if !$supported;
7924 return $format;
7925 }
7926
7927 # NOTE: if this logic changes, please update docs & possibly gui logic
7928 sub find_vmstate_storage {
7929 my ($conf, $storecfg) = @_;
7930
7931 # first, return storage from conf if set
7932 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
7933
7934 my ($target, $shared, $local);
7935
7936 foreach_storage_used_by_vm($conf, sub {
7937 my ($sid) = @_;
7938 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
7939 my $dst = $scfg->{shared} ? \$shared : \$local;
7940 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
7941 });
7942
7943 # second, use shared storage where VM has at least one disk
7944 # third, use local storage where VM has at least one disk
7945 # fall back to local storage
7946 $target = $shared // $local // 'local';
7947
7948 return $target;
7949 }
7950
7951 sub generate_uuid {
7952 my ($uuid, $uuid_str);
7953 UUID::generate($uuid);
7954 UUID::unparse($uuid, $uuid_str);
7955 return $uuid_str;
7956 }
7957
7958 sub generate_smbios1_uuid {
7959 return "uuid=".generate_uuid();
7960 }
7961
7962 sub nbd_stop {
7963 my ($vmid) = @_;
7964
7965 mon_cmd($vmid, 'nbd-server-stop');
7966 }
7967
7968 sub create_reboot_request {
7969 my ($vmid) = @_;
7970 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
7971 or die "failed to create reboot trigger file: $!\n";
7972 close($fh);
7973 }
7974
7975 sub clear_reboot_request {
7976 my ($vmid) = @_;
7977 my $path = "/run/qemu-server/$vmid.reboot";
7978 my $res = 0;
7979
7980 $res = unlink($path);
7981 die "could not remove reboot request for $vmid: $!"
7982 if !$res && $! != POSIX::ENOENT;
7983
7984 return $res;
7985 }
7986
7987 sub bootorder_from_legacy {
7988 my ($conf, $bootcfg) = @_;
7989
7990 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
7991 my $bootindex_hash = {};
7992 my $i = 1;
7993 foreach my $o (split(//, $boot)) {
7994 $bootindex_hash->{$o} = $i*100;
7995 $i++;
7996 }
7997
7998 my $bootorder = {};
7999
8000 PVE::QemuConfig->foreach_volume($conf, sub {
8001 my ($ds, $drive) = @_;
8002
8003 if (drive_is_cdrom ($drive, 1)) {
8004 if ($bootindex_hash->{d}) {
8005 $bootorder->{$ds} = $bootindex_hash->{d};
8006 $bootindex_hash->{d} += 1;
8007 }
8008 } elsif ($bootindex_hash->{c}) {
8009 $bootorder->{$ds} = $bootindex_hash->{c}
8010 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
8011 $bootindex_hash->{c} += 1;
8012 }
8013 });
8014
8015 if ($bootindex_hash->{n}) {
8016 for (my $i = 0; $i < $MAX_NETS; $i++) {
8017 my $netname = "net$i";
8018 next if !$conf->{$netname};
8019 $bootorder->{$netname} = $bootindex_hash->{n};
8020 $bootindex_hash->{n} += 1;
8021 }
8022 }
8023
8024 return $bootorder;
8025 }
8026
8027 # Generate default device list for 'boot: order=' property. Matches legacy
8028 # default boot order, but with explicit device names. This is important, since
8029 # the fallback for when neither 'order' nor the old format is specified relies
8030 # on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
8031 sub get_default_bootdevices {
8032 my ($conf) = @_;
8033
8034 my @ret = ();
8035
8036 # harddisk
8037 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
8038 push @ret, $first if $first;
8039
8040 # cdrom
8041 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
8042 push @ret, $first if $first;
8043
8044 # network
8045 for (my $i = 0; $i < $MAX_NETS; $i++) {
8046 my $netname = "net$i";
8047 next if !$conf->{$netname};
8048 push @ret, $netname;
8049 last;
8050 }
8051
8052 return \@ret;
8053 }
8054
8055 sub device_bootorder {
8056 my ($conf) = @_;
8057
8058 return bootorder_from_legacy($conf) if !defined($conf->{boot});
8059
8060 my $boot = parse_property_string($boot_fmt, $conf->{boot});
8061
8062 my $bootorder = {};
8063 if (!defined($boot) || $boot->{legacy}) {
8064 $bootorder = bootorder_from_legacy($conf, $boot);
8065 } elsif ($boot->{order}) {
8066 my $i = 100; # start at 100 to allow user to insert devices before us with -args
8067 for my $dev (PVE::Tools::split_list($boot->{order})) {
8068 $bootorder->{$dev} = $i++;
8069 }
8070 }
8071
8072 return $bootorder;
8073 }
8074
8075 sub register_qmeventd_handle {
8076 my ($vmid) = @_;
8077
8078 my $fh;
8079 my $peer = "/var/run/qmeventd.sock";
8080 my $count = 0;
8081
8082 for (;;) {
8083 $count++;
8084 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
8085 last if $fh;
8086 if ($! != EINTR && $! != EAGAIN) {
8087 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
8088 }
8089 if ($count > 4) {
8090 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
8091 . "after $count retries\n";
8092 }
8093 usleep(25000);
8094 }
8095
8096 # send handshake to mark VM as backing up
8097 print $fh to_json({vzdump => {vmid => "$vmid"}});
8098
8099 # return handle to be closed later when inhibit is no longer required
8100 return $fh;
8101 }
8102
8103 # bash completion helper
8104
8105 sub complete_backup_archives {
8106 my ($cmdname, $pname, $cvalue) = @_;
8107
8108 my $cfg = PVE::Storage::config();
8109
8110 my $storeid;
8111
8112 if ($cvalue =~ m/^([^:]+):/) {
8113 $storeid = $1;
8114 }
8115
8116 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
8117
8118 my $res = [];
8119 foreach my $id (keys %$data) {
8120 foreach my $item (@{$data->{$id}}) {
8121 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
8122 push @$res, $item->{volid} if defined($item->{volid});
8123 }
8124 }
8125
8126 return $res;
8127 }
8128
8129 my $complete_vmid_full = sub {
8130 my ($running) = @_;
8131
8132 my $idlist = vmstatus();
8133
8134 my $res = [];
8135
8136 foreach my $id (keys %$idlist) {
8137 my $d = $idlist->{$id};
8138 if (defined($running)) {
8139 next if $d->{template};
8140 next if $running && $d->{status} ne 'running';
8141 next if !$running && $d->{status} eq 'running';
8142 }
8143 push @$res, $id;
8144
8145 }
8146 return $res;
8147 };
8148
8149 sub complete_vmid {
8150 return &$complete_vmid_full();
8151 }
8152
8153 sub complete_vmid_stopped {
8154 return &$complete_vmid_full(0);
8155 }
8156
8157 sub complete_vmid_running {
8158 return &$complete_vmid_full(1);
8159 }
8160
8161 sub complete_storage {
8162
8163 my $cfg = PVE::Storage::config();
8164 my $ids = $cfg->{ids};
8165
8166 my $res = [];
8167 foreach my $sid (keys %$ids) {
8168 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
8169 next if !$ids->{$sid}->{content}->{images};
8170 push @$res, $sid;
8171 }
8172
8173 return $res;
8174 }
8175
8176 sub complete_migration_storage {
8177 my ($cmd, $param, $current_value, $all_args) = @_;
8178
8179 my $targetnode = @$all_args[1];
8180
8181 my $cfg = PVE::Storage::config();
8182 my $ids = $cfg->{ids};
8183
8184 my $res = [];
8185 foreach my $sid (keys %$ids) {
8186 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
8187 next if !$ids->{$sid}->{content}->{images};
8188 push @$res, $sid;
8189 }
8190
8191 return $res;
8192 }
8193
8194 sub vm_is_paused {
8195 my ($vmid) = @_;
8196 my $qmpstatus = eval {
8197 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8198 mon_cmd($vmid, "query-status");
8199 };
8200 warn "$@\n" if $@;
8201 return $qmpstatus && $qmpstatus->{status} eq "paused";
8202 }
8203
8204 sub check_volume_storage_type {
8205 my ($storecfg, $vol) = @_;
8206
8207 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8208 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8209 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8210
8211 die "storage '$storeid' does not support content-type '$vtype'\n"
8212 if !$scfg->{content}->{$vtype};
8213
8214 return 1;
8215 }
8216
8217 1;