]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
bump version to 8.2.1
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5
6 use Cwd 'abs_path';
7 use Digest::SHA;
8 use Fcntl ':flock';
9 use Fcntl;
10 use File::Basename;
11 use File::Copy qw(copy);
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use IO::Dir;
16 use IO::File;
17 use IO::Handle;
18 use IO::Select;
19 use IO::Socket::UNIX;
20 use IPC::Open3;
21 use JSON;
22 use List::Util qw(first);
23 use MIME::Base64;
24 use POSIX;
25 use Storable qw(dclone);
26 use Time::HiRes qw(gettimeofday usleep);
27 use URI::Escape;
28 use UUID;
29
30 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
31 use PVE::CGroup;
32 use PVE::CpuSet;
33 use PVE::DataCenterConfig;
34 use PVE::Exception qw(raise raise_param_exc);
35 use PVE::Format qw(render_duration render_bytes);
36 use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
37 use PVE::INotify;
38 use PVE::JSONSchema qw(get_standard_option parse_property_string);
39 use PVE::ProcFSTools;
40 use PVE::PBSClient;
41 use PVE::RESTEnvironment qw(log_warn);
42 use PVE::RPCEnvironment;
43 use PVE::Storage;
44 use PVE::SysFSTools;
45 use PVE::Systemd;
46 use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
47
48 use PVE::QMPClient;
49 use PVE::QemuConfig;
50 use PVE::QemuServer::Helpers qw(min_version config_aware_timeout windows_version);
51 use PVE::QemuServer::Cloudinit;
52 use PVE::QemuServer::CGroup;
53 use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
54 use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
55 use PVE::QemuServer::Machine;
56 use PVE::QemuServer::Memory;
57 use PVE::QemuServer::Monitor qw(mon_cmd);
58 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
59 use PVE::QemuServer::USB qw(parse_usb_device);
60
61 my $have_sdn;
62 eval {
63 require PVE::Network::SDN::Zones;
64 $have_sdn = 1;
65 };
66
67 my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
68 my $OVMF = {
69 x86_64 => {
70 '4m-no-smm' => [
71 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
72 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
73 ],
74 '4m-no-smm-ms' => [
75 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
76 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
77 ],
78 '4m' => [
79 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
80 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
81 ],
82 '4m-ms' => [
83 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
84 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
85 ],
86 default => [
87 "$EDK2_FW_BASE/OVMF_CODE.fd",
88 "$EDK2_FW_BASE/OVMF_VARS.fd",
89 ],
90 },
91 aarch64 => {
92 default => [
93 "$EDK2_FW_BASE/AAVMF_CODE.fd",
94 "$EDK2_FW_BASE/AAVMF_VARS.fd",
95 ],
96 },
97 };
98
99 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
100
101 # Note about locking: we use flock on the config file protect against concurent actions.
102 # Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
103 # 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
104 # But you can ignore this kind of lock with the --skiplock flag.
105
106 cfs_register_file(
107 '/qemu-server/',
108 \&parse_vm_config,
109 \&write_vm_config
110 );
111
112 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
113 description => "Some command save/restore state from this location.",
114 type => 'string',
115 maxLength => 128,
116 optional => 1,
117 });
118
119 PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
120 description => "Specifies the QEMU machine type.",
121 type => 'string',
122 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
123 maxLength => 40,
124 optional => 1,
125 });
126
127 # FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way
128 my $nodename_cache;
129 sub nodename {
130 $nodename_cache //= PVE::INotify::nodename();
131 return $nodename_cache;
132 }
133
134 my $watchdog_fmt = {
135 model => {
136 default_key => 1,
137 type => 'string',
138 enum => [qw(i6300esb ib700)],
139 description => "Watchdog type to emulate.",
140 default => 'i6300esb',
141 optional => 1,
142 },
143 action => {
144 type => 'string',
145 enum => [qw(reset shutdown poweroff pause debug none)],
146 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
147 optional => 1,
148 },
149 };
150 PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
151
152 my $agent_fmt = {
153 enabled => {
154 description => "Enable/disable communication with a QEMU Guest Agent (QGA) running in the VM.",
155 type => 'boolean',
156 default => 0,
157 default_key => 1,
158 },
159 fstrim_cloned_disks => {
160 description => "Run fstrim after moving a disk or migrating the VM.",
161 type => 'boolean',
162 optional => 1,
163 default => 0
164 },
165 type => {
166 description => "Select the agent type",
167 type => 'string',
168 default => 'virtio',
169 optional => 1,
170 enum => [qw(virtio isa)],
171 },
172 };
173
174 my $vga_fmt = {
175 type => {
176 description => "Select the VGA type.",
177 type => 'string',
178 default => 'std',
179 optional => 1,
180 default_key => 1,
181 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware)],
182 },
183 memory => {
184 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
185 type => 'integer',
186 optional => 1,
187 minimum => 4,
188 maximum => 512,
189 },
190 };
191
192 my $ivshmem_fmt = {
193 size => {
194 type => 'integer',
195 minimum => 1,
196 description => "The size of the file in MB.",
197 },
198 name => {
199 type => 'string',
200 pattern => '[a-zA-Z0-9\-]+',
201 optional => 1,
202 format_description => 'string',
203 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
204 },
205 };
206
207 my $audio_fmt = {
208 device => {
209 type => 'string',
210 enum => [qw(ich9-intel-hda intel-hda AC97)],
211 description => "Configure an audio device."
212 },
213 driver => {
214 type => 'string',
215 enum => ['spice', 'none'],
216 default => 'spice',
217 optional => 1,
218 description => "Driver backend for the audio device."
219 },
220 };
221
222 my $spice_enhancements_fmt = {
223 foldersharing => {
224 type => 'boolean',
225 optional => 1,
226 default => '0',
227 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
228 },
229 videostreaming => {
230 type => 'string',
231 enum => ['off', 'all', 'filter'],
232 default => 'off',
233 optional => 1,
234 description => "Enable video streaming. Uses compression for detected video streams."
235 },
236 };
237
238 my $rng_fmt = {
239 source => {
240 type => 'string',
241 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
242 default_key => 1,
243 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
244 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
245 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
246 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
247 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
248 ." a hardware RNG from the host.",
249 },
250 max_bytes => {
251 type => 'integer',
252 description => "Maximum bytes of entropy allowed to get injected into the guest every"
253 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
254 ." `0` to disable limiting (potentially dangerous!).",
255 optional => 1,
256
257 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
258 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
259 # reading from /dev/urandom
260 default => 1024,
261 },
262 period => {
263 type => 'integer',
264 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
265 ." the guest to retrieve another 'max_bytes' of entropy.",
266 optional => 1,
267 default => 1000,
268 },
269 };
270
271 my $meta_info_fmt = {
272 'ctime' => {
273 type => 'integer',
274 description => "The guest creation timestamp as UNIX epoch time",
275 minimum => 0,
276 optional => 1,
277 },
278 'creation-qemu' => {
279 type => 'string',
280 description => "The QEMU (machine) version from the time this VM was created.",
281 pattern => '\d+(\.\d+)+',
282 optional => 1,
283 },
284 };
285
286 my $confdesc = {
287 onboot => {
288 optional => 1,
289 type => 'boolean',
290 description => "Specifies whether a VM will be started during system bootup.",
291 default => 0,
292 },
293 autostart => {
294 optional => 1,
295 type => 'boolean',
296 description => "Automatic restart after crash (currently ignored).",
297 default => 0,
298 },
299 hotplug => {
300 optional => 1,
301 type => 'string', format => 'pve-hotplug-features',
302 description => "Selectively enable hotplug features. This is a comma separated list of"
303 ." hotplug features: 'network', 'disk', 'cpu', 'memory', 'usb' and 'cloudinit'. Use '0' to disable"
304 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`."
305 ." USB hotplugging is possible for guests with machine version >= 7.1 and ostype l26 or"
306 ." windows > 7.",
307 default => 'network,disk,usb',
308 },
309 reboot => {
310 optional => 1,
311 type => 'boolean',
312 description => "Allow reboot. If set to '0' the VM exit on reboot.",
313 default => 1,
314 },
315 lock => {
316 optional => 1,
317 type => 'string',
318 description => "Lock/unlock the VM.",
319 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
320 },
321 cpulimit => {
322 optional => 1,
323 type => 'number',
324 description => "Limit of CPU usage.",
325 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
326 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
327 minimum => 0,
328 maximum => 128,
329 default => 0,
330 },
331 cpuunits => {
332 optional => 1,
333 type => 'integer',
334 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
335 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
336 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
337 ." weights of all the other running VMs.",
338 minimum => 1,
339 maximum => 262144,
340 default => 'cgroup v1: 1024, cgroup v2: 100',
341 },
342 memory => {
343 optional => 1,
344 type => 'integer',
345 description => "Amount of RAM for the VM in MiB. This is the maximum available memory when"
346 ." you use the balloon device.",
347 minimum => 16,
348 default => 512,
349 },
350 balloon => {
351 optional => 1,
352 type => 'integer',
353 description => "Amount of target RAM for the VM in MiB. Using zero disables the ballon driver.",
354 minimum => 0,
355 },
356 shares => {
357 optional => 1,
358 type => 'integer',
359 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
360 ." more memory this VM gets. Number is relative to weights of all other running VMs."
361 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
362 minimum => 0,
363 maximum => 50000,
364 default => 1000,
365 },
366 keyboard => {
367 optional => 1,
368 type => 'string',
369 description => "Keyboard layout for VNC server. This option is generally not required and"
370 ." is often better handled from within the guest OS.",
371 enum => PVE::Tools::kvmkeymaplist(),
372 default => undef,
373 },
374 name => {
375 optional => 1,
376 type => 'string', format => 'dns-name',
377 description => "Set a name for the VM. Only used on the configuration web interface.",
378 },
379 scsihw => {
380 optional => 1,
381 type => 'string',
382 description => "SCSI controller model",
383 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
384 default => 'lsi',
385 },
386 description => {
387 optional => 1,
388 type => 'string',
389 description => "Description for the VM. Shown in the web-interface VM's summary."
390 ." This is saved as comment inside the configuration file.",
391 maxLength => 1024 * 8,
392 },
393 ostype => {
394 optional => 1,
395 type => 'string',
396 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
397 description => "Specify guest operating system.",
398 verbose_description => <<EODESC,
399 Specify guest operating system. This is used to enable special
400 optimization/features for specific operating systems:
401
402 [horizontal]
403 other;; unspecified OS
404 wxp;; Microsoft Windows XP
405 w2k;; Microsoft Windows 2000
406 w2k3;; Microsoft Windows 2003
407 w2k8;; Microsoft Windows 2008
408 wvista;; Microsoft Windows Vista
409 win7;; Microsoft Windows 7
410 win8;; Microsoft Windows 8/2012/2012r2
411 win10;; Microsoft Windows 10/2016/2019
412 win11;; Microsoft Windows 11/2022
413 l24;; Linux 2.4 Kernel
414 l26;; Linux 2.6 - 5.X Kernel
415 solaris;; Solaris/OpenSolaris/OpenIndiania kernel
416 EODESC
417 },
418 boot => {
419 optional => 1,
420 type => 'string', format => 'pve-qm-boot',
421 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
422 ." key or 'legacy=' is deprecated.",
423 },
424 bootdisk => {
425 optional => 1,
426 type => 'string', format => 'pve-qm-bootdisk',
427 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
428 pattern => '(ide|sata|scsi|virtio)\d+',
429 },
430 smp => {
431 optional => 1,
432 type => 'integer',
433 description => "The number of CPUs. Please use option -sockets instead.",
434 minimum => 1,
435 default => 1,
436 },
437 sockets => {
438 optional => 1,
439 type => 'integer',
440 description => "The number of CPU sockets.",
441 minimum => 1,
442 default => 1,
443 },
444 cores => {
445 optional => 1,
446 type => 'integer',
447 description => "The number of cores per socket.",
448 minimum => 1,
449 default => 1,
450 },
451 numa => {
452 optional => 1,
453 type => 'boolean',
454 description => "Enable/disable NUMA.",
455 default => 0,
456 },
457 hugepages => {
458 optional => 1,
459 type => 'string',
460 description => "Enable/disable hugepages memory.",
461 enum => [qw(any 2 1024)],
462 },
463 keephugepages => {
464 optional => 1,
465 type => 'boolean',
466 default => 0,
467 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
468 ." after VM shutdown and can be used for subsequent starts.",
469 },
470 vcpus => {
471 optional => 1,
472 type => 'integer',
473 description => "Number of hotplugged vcpus.",
474 minimum => 1,
475 default => 0,
476 },
477 acpi => {
478 optional => 1,
479 type => 'boolean',
480 description => "Enable/disable ACPI.",
481 default => 1,
482 },
483 agent => {
484 optional => 1,
485 description => "Enable/disable communication with the QEMU Guest Agent and its properties.",
486 type => 'string',
487 format => $agent_fmt,
488 },
489 kvm => {
490 optional => 1,
491 type => 'boolean',
492 description => "Enable/disable KVM hardware virtualization.",
493 default => 1,
494 },
495 tdf => {
496 optional => 1,
497 type => 'boolean',
498 description => "Enable/disable time drift fix.",
499 default => 0,
500 },
501 localtime => {
502 optional => 1,
503 type => 'boolean',
504 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
505 ." the `ostype` indicates a Microsoft Windows OS.",
506 },
507 freeze => {
508 optional => 1,
509 type => 'boolean',
510 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
511 },
512 vga => {
513 optional => 1,
514 type => 'string', format => $vga_fmt,
515 description => "Configure the VGA hardware.",
516 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
517 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
518 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
519 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
520 ." display server. For win* OS you can select how many independent displays you want,"
521 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
522 ." using a serial device as terminal.",
523 },
524 watchdog => {
525 optional => 1,
526 type => 'string', format => 'pve-qm-watchdog',
527 description => "Create a virtual hardware watchdog device.",
528 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
529 ." action), the watchdog must be periodically polled by an agent inside the guest or"
530 ." else the watchdog will reset the guest (or execute the respective action specified)",
531 },
532 startdate => {
533 optional => 1,
534 type => 'string',
535 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
536 description => "Set the initial date of the real time clock. Valid format for date are:"
537 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
538 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
539 default => 'now',
540 },
541 startup => get_standard_option('pve-startup-order'),
542 template => {
543 optional => 1,
544 type => 'boolean',
545 description => "Enable/disable Template.",
546 default => 0,
547 },
548 args => {
549 optional => 1,
550 type => 'string',
551 description => "Arbitrary arguments passed to kvm.",
552 verbose_description => <<EODESCR,
553 Arbitrary arguments passed to kvm, for example:
554
555 args: -no-reboot -no-hpet
556
557 NOTE: this option is for experts only.
558 EODESCR
559 },
560 tablet => {
561 optional => 1,
562 type => 'boolean',
563 default => 1,
564 description => "Enable/disable the USB tablet device.",
565 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
566 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
567 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
568 ." may consider disabling this to save some context switches. This is turned off by"
569 ." default if you use spice (`qm set <vmid> --vga qxl`).",
570 },
571 migrate_speed => {
572 optional => 1,
573 type => 'integer',
574 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
575 minimum => 0,
576 default => 0,
577 },
578 migrate_downtime => {
579 optional => 1,
580 type => 'number',
581 description => "Set maximum tolerated downtime (in seconds) for migrations.",
582 minimum => 0,
583 default => 0.1,
584 },
585 cdrom => {
586 optional => 1,
587 type => 'string', format => 'pve-qm-ide',
588 typetext => '<volume>',
589 description => "This is an alias for option -ide2",
590 },
591 cpu => {
592 optional => 1,
593 description => "Emulated CPU type.",
594 type => 'string',
595 format => 'pve-vm-cpu-conf',
596 },
597 parent => get_standard_option('pve-snapshot-name', {
598 optional => 1,
599 description => "Parent snapshot name. This is used internally, and should not be modified.",
600 }),
601 snaptime => {
602 optional => 1,
603 description => "Timestamp for snapshots.",
604 type => 'integer',
605 minimum => 0,
606 },
607 vmstate => {
608 optional => 1,
609 type => 'string', format => 'pve-volume-id',
610 description => "Reference to a volume which stores the VM state. This is used internally"
611 ." for snapshots.",
612 },
613 vmstatestorage => get_standard_option('pve-storage-id', {
614 description => "Default storage for VM state volumes/files.",
615 optional => 1,
616 }),
617 runningmachine => get_standard_option('pve-qemu-machine', {
618 description => "Specifies the QEMU machine type of the running vm. This is used internally"
619 ." for snapshots.",
620 }),
621 runningcpu => {
622 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
623 ." internally for snapshots.",
624 optional => 1,
625 type => 'string',
626 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
627 format_description => 'QEMU -cpu parameter'
628 },
629 machine => get_standard_option('pve-qemu-machine'),
630 arch => {
631 description => "Virtual processor architecture. Defaults to the host.",
632 optional => 1,
633 type => 'string',
634 enum => [qw(x86_64 aarch64)],
635 },
636 smbios1 => {
637 description => "Specify SMBIOS type 1 fields.",
638 type => 'string', format => 'pve-qm-smbios1',
639 maxLength => 512,
640 optional => 1,
641 },
642 protection => {
643 optional => 1,
644 type => 'boolean',
645 description => "Sets the protection flag of the VM. This will disable the remove VM and"
646 ." remove disk operations.",
647 default => 0,
648 },
649 bios => {
650 optional => 1,
651 type => 'string',
652 enum => [ qw(seabios ovmf) ],
653 description => "Select BIOS implementation.",
654 default => 'seabios',
655 },
656 vmgenid => {
657 type => 'string',
658 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
659 format_description => 'UUID',
660 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
661 ." to disable explicitly.",
662 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
663 ." value identifier to the guest OS. This allows to notify the guest operating system"
664 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
665 ." execution or creation from a template). The guest operating system notices the"
666 ." change, and is then able to react as appropriate by marking its copies of"
667 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
668 ."Note that auto-creation only works when done through API/CLI create or update methods"
669 .", but not when manually editing the config file.",
670 default => "1 (autogenerated)",
671 optional => 1,
672 },
673 hookscript => {
674 type => 'string',
675 format => 'pve-volume-id',
676 optional => 1,
677 description => "Script that will be executed during various steps in the vms lifetime.",
678 },
679 ivshmem => {
680 type => 'string',
681 format => $ivshmem_fmt,
682 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
683 ." the host.",
684 optional => 1,
685 },
686 audio0 => {
687 type => 'string',
688 format => $audio_fmt,
689 description => "Configure a audio device, useful in combination with QXL/Spice.",
690 optional => 1
691 },
692 spice_enhancements => {
693 type => 'string',
694 format => $spice_enhancements_fmt,
695 description => "Configure additional enhancements for SPICE.",
696 optional => 1
697 },
698 tags => {
699 type => 'string', format => 'pve-tag-list',
700 description => 'Tags of the VM. This is only meta information.',
701 optional => 1,
702 },
703 rng0 => {
704 type => 'string',
705 format => $rng_fmt,
706 description => "Configure a VirtIO-based Random Number Generator.",
707 optional => 1,
708 },
709 meta => {
710 type => 'string',
711 format => $meta_info_fmt,
712 description => "Some (read-only) meta-information about this guest.",
713 optional => 1,
714 },
715 affinity => {
716 type => 'string', format => 'pve-cpuset',
717 description => "List of host cores used to execute guest processes, for example: 0,5,8-11",
718 optional => 1,
719 },
720 };
721
722 my $cicustom_fmt = {
723 meta => {
724 type => 'string',
725 optional => 1,
726 description => 'Specify a custom file containing all meta data passed to the VM via"
727 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
728 format => 'pve-volume-id',
729 format_description => 'volume',
730 },
731 network => {
732 type => 'string',
733 optional => 1,
734 description => 'To pass a custom file containing all network data to the VM via cloud-init.',
735 format => 'pve-volume-id',
736 format_description => 'volume',
737 },
738 user => {
739 type => 'string',
740 optional => 1,
741 description => 'To pass a custom file containing all user data to the VM via cloud-init.',
742 format => 'pve-volume-id',
743 format_description => 'volume',
744 },
745 vendor => {
746 type => 'string',
747 optional => 1,
748 description => 'To pass a custom file containing all vendor data to the VM via cloud-init.',
749 format => 'pve-volume-id',
750 format_description => 'volume',
751 },
752 };
753 PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
754
755 my $confdesc_cloudinit = {
756 citype => {
757 optional => 1,
758 type => 'string',
759 description => 'Specifies the cloud-init configuration format. The default depends on the'
760 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
761 .' and `configdrive2` for windows.',
762 enum => ['configdrive2', 'nocloud', 'opennebula'],
763 },
764 ciuser => {
765 optional => 1,
766 type => 'string',
767 description => "cloud-init: User name to change ssh keys and password for instead of the"
768 ." image's configured default user.",
769 },
770 cipassword => {
771 optional => 1,
772 type => 'string',
773 description => 'cloud-init: Password to assign the user. Using this is generally not'
774 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
775 .' support hashed passwords.',
776 },
777 cicustom => {
778 optional => 1,
779 type => 'string',
780 description => 'cloud-init: Specify custom files to replace the automatically generated'
781 .' ones at start.',
782 format => 'pve-qm-cicustom',
783 },
784 searchdomain => {
785 optional => 1,
786 type => 'string',
787 description => 'cloud-init: Sets DNS search domains for a container. Create will'
788 .' automatically use the setting from the host if neither searchdomain nor nameserver'
789 .' are set.',
790 },
791 nameserver => {
792 optional => 1,
793 type => 'string', format => 'address-list',
794 description => 'cloud-init: Sets DNS server IP address for a container. Create will'
795 .' automatically use the setting from the host if neither searchdomain nor nameserver'
796 .' are set.',
797 },
798 sshkeys => {
799 optional => 1,
800 type => 'string',
801 format => 'urlencoded',
802 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
803 },
804 };
805
806 # what about other qemu settings ?
807 #cpu => 'string',
808 #machine => 'string',
809 #fda => 'file',
810 #fdb => 'file',
811 #mtdblock => 'file',
812 #sd => 'file',
813 #pflash => 'file',
814 #snapshot => 'bool',
815 #bootp => 'file',
816 ##tftp => 'dir',
817 ##smb => 'dir',
818 #kernel => 'file',
819 #append => 'string',
820 #initrd => 'file',
821 ##soundhw => 'string',
822
823 while (my ($k, $v) = each %$confdesc) {
824 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
825 }
826
827 my $MAX_USB_DEVICES = 14;
828 my $MAX_NETS = 32;
829 my $MAX_SERIAL_PORTS = 4;
830 my $MAX_PARALLEL_PORTS = 3;
831 my $MAX_NUMA = 8;
832
833 my $numa_fmt = {
834 cpus => {
835 type => "string",
836 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
837 description => "CPUs accessing this NUMA node.",
838 format_description => "id[-id];...",
839 },
840 memory => {
841 type => "number",
842 description => "Amount of memory this NUMA node provides.",
843 optional => 1,
844 },
845 hostnodes => {
846 type => "string",
847 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
848 description => "Host NUMA nodes to use.",
849 format_description => "id[-id];...",
850 optional => 1,
851 },
852 policy => {
853 type => 'string',
854 enum => [qw(preferred bind interleave)],
855 description => "NUMA allocation policy.",
856 optional => 1,
857 },
858 };
859 PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
860 my $numadesc = {
861 optional => 1,
862 type => 'string', format => $numa_fmt,
863 description => "NUMA topology.",
864 };
865 PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
866
867 for (my $i = 0; $i < $MAX_NUMA; $i++) {
868 $confdesc->{"numa$i"} = $numadesc;
869 }
870
871 my $nic_model_list = [
872 'e1000',
873 'e1000-82540em',
874 'e1000-82544gc',
875 'e1000-82545em',
876 'e1000e',
877 'i82551',
878 'i82557b',
879 'i82559er',
880 'ne2k_isa',
881 'ne2k_pci',
882 'pcnet',
883 'rtl8139',
884 'virtio',
885 'vmxnet3',
886 ];
887 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
888
889 my $net_fmt_bridge_descr = <<__EOD__;
890 Bridge to attach the network device to. The Proxmox VE standard bridge
891 is called 'vmbr0'.
892
893 If you do not specify a bridge, we create a kvm user (NATed) network
894 device, which provides DHCP and DNS services. The following addresses
895 are used:
896
897 10.0.2.2 Gateway
898 10.0.2.3 DNS Server
899 10.0.2.4 SMB Server
900
901 The DHCP server assign addresses to the guest starting from 10.0.2.15.
902 __EOD__
903
904 my $net_fmt = {
905 macaddr => get_standard_option('mac-addr', {
906 description => "MAC address. That address must be unique withing your network. This is"
907 ." automatically generated if not specified.",
908 }),
909 model => {
910 type => 'string',
911 description => "Network Card Model. The 'virtio' model provides the best performance with"
912 ." very low CPU overhead. If your guest does not support this driver, it is usually"
913 ." best to use 'e1000'.",
914 enum => $nic_model_list,
915 default_key => 1,
916 },
917 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
918 bridge => get_standard_option('pve-bridge-id', {
919 description => $net_fmt_bridge_descr,
920 optional => 1,
921 }),
922 queues => {
923 type => 'integer',
924 minimum => 0, maximum => 64,
925 description => 'Number of packet queues to be used on the device.',
926 optional => 1,
927 },
928 rate => {
929 type => 'number',
930 minimum => 0,
931 description => "Rate limit in mbps (megabytes per second) as floating point number.",
932 optional => 1,
933 },
934 tag => {
935 type => 'integer',
936 minimum => 1, maximum => 4094,
937 description => 'VLAN tag to apply to packets on this interface.',
938 optional => 1,
939 },
940 trunks => {
941 type => 'string',
942 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
943 description => 'VLAN trunks to pass through this interface.',
944 format_description => 'vlanid[;vlanid...]',
945 optional => 1,
946 },
947 firewall => {
948 type => 'boolean',
949 description => 'Whether this interface should be protected by the firewall.',
950 optional => 1,
951 },
952 link_down => {
953 type => 'boolean',
954 description => 'Whether this interface should be disconnected (like pulling the plug).',
955 optional => 1,
956 },
957 mtu => {
958 type => 'integer',
959 minimum => 1, maximum => 65520,
960 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
961 optional => 1,
962 },
963 };
964
965 my $netdesc = {
966 optional => 1,
967 type => 'string', format => $net_fmt,
968 description => "Specify network devices.",
969 };
970
971 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
972
973 my $ipconfig_fmt = {
974 ip => {
975 type => 'string',
976 format => 'pve-ipv4-config',
977 format_description => 'IPv4Format/CIDR',
978 description => 'IPv4 address in CIDR format.',
979 optional => 1,
980 default => 'dhcp',
981 },
982 gw => {
983 type => 'string',
984 format => 'ipv4',
985 format_description => 'GatewayIPv4',
986 description => 'Default gateway for IPv4 traffic.',
987 optional => 1,
988 requires => 'ip',
989 },
990 ip6 => {
991 type => 'string',
992 format => 'pve-ipv6-config',
993 format_description => 'IPv6Format/CIDR',
994 description => 'IPv6 address in CIDR format.',
995 optional => 1,
996 default => 'dhcp',
997 },
998 gw6 => {
999 type => 'string',
1000 format => 'ipv6',
1001 format_description => 'GatewayIPv6',
1002 description => 'Default gateway for IPv6 traffic.',
1003 optional => 1,
1004 requires => 'ip6',
1005 },
1006 };
1007 PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
1008 my $ipconfigdesc = {
1009 optional => 1,
1010 type => 'string', format => 'pve-qm-ipconfig',
1011 description => <<'EODESCR',
1012 cloud-init: Specify IP addresses and gateways for the corresponding interface.
1013
1014 IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1015
1016 The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1017 gateway should be provided.
1018 For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1019 cloud-init 19.4 or newer.
1020
1021 If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1022 dhcp on IPv4.
1023 EODESCR
1024 };
1025 PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1026
1027 for (my $i = 0; $i < $MAX_NETS; $i++) {
1028 $confdesc->{"net$i"} = $netdesc;
1029 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1030 }
1031
1032 foreach my $key (keys %$confdesc_cloudinit) {
1033 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1034 }
1035
1036 PVE::JSONSchema::register_format('pve-cpuset', \&pve_verify_cpuset);
1037 sub pve_verify_cpuset {
1038 my ($set_text, $noerr) = @_;
1039
1040 my ($count, $members) = eval { PVE::CpuSet::parse_cpuset($set_text) };
1041
1042 if ($@) {
1043 return if $noerr;
1044 die "unable to parse cpuset option\n";
1045 }
1046
1047 return PVE::CpuSet->new($members)->short_string();
1048 }
1049
1050 PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1051 sub verify_volume_id_or_qm_path {
1052 my ($volid, $noerr) = @_;
1053
1054 return $volid if $volid eq 'none' || $volid eq 'cdrom';
1055
1056 return verify_volume_id_or_absolute_path($volid, $noerr);
1057 }
1058
1059 PVE::JSONSchema::register_format('pve-volume-id-or-absolute-path', \&verify_volume_id_or_absolute_path);
1060 sub verify_volume_id_or_absolute_path {
1061 my ($volid, $noerr) = @_;
1062
1063 return $volid if $volid =~ m|^/|;
1064
1065 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1066 if ($@) {
1067 return if $noerr;
1068 die $@;
1069 }
1070 return $volid;
1071 }
1072
1073 my $usb_fmt = {
1074 host => {
1075 default_key => 1,
1076 type => 'string', format => 'pve-qm-usb-device',
1077 format_description => 'HOSTUSBDEVICE|spice',
1078 description => <<EODESCR,
1079 The Host USB device or port or the value 'spice'. HOSTUSBDEVICE syntax is:
1080
1081 'bus-port(.port)*' (decimal numbers) or
1082 'vendor_id:product_id' (hexadeciaml numbers) or
1083 'spice'
1084
1085 You can use the 'lsusb -t' command to list existing usb devices.
1086
1087 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1088 machines - use with special care.
1089
1090 The value 'spice' can be used to add a usb redirection devices for spice.
1091 EODESCR
1092 },
1093 usb3 => {
1094 optional => 1,
1095 type => 'boolean',
1096 description => "Specifies whether if given host option is a USB3 device or port."
1097 ." For modern guests (machine version >= 7.1 and ostype l26 and windows > 7), this flag"
1098 ." is irrelevant (all devices are plugged into a xhci controller).",
1099 default => 0,
1100 },
1101 };
1102
1103 my $usbdesc = {
1104 optional => 1,
1105 type => 'string', format => $usb_fmt,
1106 description => "Configure an USB device (n is 0 to 4, for machine version >= 7.1 and ostype"
1107 ." l26 or windows > 7, n can be up to 14).",
1108 };
1109 PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
1110
1111 my $serialdesc = {
1112 optional => 1,
1113 type => 'string',
1114 pattern => '(/dev/.+|socket)',
1115 description => "Create a serial device inside the VM (n is 0 to 3)",
1116 verbose_description => <<EODESCR,
1117 Create a serial device inside the VM (n is 0 to 3), and pass through a
1118 host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1119 host side (use 'qm terminal' to open a terminal connection).
1120
1121 NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1122 use with special care.
1123
1124 CAUTION: Experimental! User reported problems with this option.
1125 EODESCR
1126 };
1127
1128 my $paralleldesc= {
1129 optional => 1,
1130 type => 'string',
1131 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1132 description => "Map host parallel devices (n is 0 to 2).",
1133 verbose_description => <<EODESCR,
1134 Map host parallel devices (n is 0 to 2).
1135
1136 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1137 machines - use with special care.
1138
1139 CAUTION: Experimental! User reported problems with this option.
1140 EODESCR
1141 };
1142
1143 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1144 $confdesc->{"parallel$i"} = $paralleldesc;
1145 }
1146
1147 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1148 $confdesc->{"serial$i"} = $serialdesc;
1149 }
1150
1151 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1152 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1153 }
1154
1155 for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1156 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1157 }
1158
1159 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1160 $confdesc->{"usb$i"} = $usbdesc;
1161 }
1162
1163 my $boot_fmt = {
1164 legacy => {
1165 optional => 1,
1166 default_key => 1,
1167 type => 'string',
1168 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1169 . " Deprecated, use 'order=' instead.",
1170 pattern => '[acdn]{1,4}',
1171 format_description => "[acdn]{1,4}",
1172
1173 # note: this is also the fallback if boot: is not given at all
1174 default => 'cdn',
1175 },
1176 order => {
1177 optional => 1,
1178 type => 'string',
1179 format => 'pve-qm-bootdev-list',
1180 format_description => "device[;device...]",
1181 description => <<EODESC,
1182 The guest will attempt to boot from devices in the order they appear here.
1183
1184 Disks, optical drives and passed-through storage USB devices will be directly
1185 booted from, NICs will load PXE, and PCIe devices will either behave like disks
1186 (e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1187
1188 Note that only devices in this list will be marked as bootable and thus loaded
1189 by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1190 (e.g. software-raid), you need to specify all of them here.
1191
1192 Overrides the deprecated 'legacy=[acdn]*' value when given.
1193 EODESC
1194 },
1195 };
1196 PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1197
1198 PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1199 sub verify_bootdev {
1200 my ($dev, $noerr) = @_;
1201
1202 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1203 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1204
1205 my $check = sub {
1206 my ($base) = @_;
1207 return 0 if $dev !~ m/^$base\d+$/;
1208 return 0 if !$confdesc->{$dev};
1209 return 1;
1210 };
1211
1212 return $dev if $check->("net");
1213 return $dev if $check->("usb");
1214 return $dev if $check->("hostpci");
1215
1216 return if $noerr;
1217 die "invalid boot device '$dev'\n";
1218 }
1219
1220 sub print_bootorder {
1221 my ($devs) = @_;
1222 return "" if !@$devs;
1223 my $data = { order => join(';', @$devs) };
1224 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1225 }
1226
1227 my $kvm_api_version = 0;
1228
1229 sub kvm_version {
1230 return $kvm_api_version if $kvm_api_version;
1231
1232 open my $fh, '<', '/dev/kvm' or return;
1233
1234 # 0xae00 => KVM_GET_API_VERSION
1235 $kvm_api_version = ioctl($fh, 0xae00, 0);
1236 close($fh);
1237
1238 return $kvm_api_version;
1239 }
1240
1241 my $kvm_user_version = {};
1242 my $kvm_mtime = {};
1243
1244 sub kvm_user_version {
1245 my ($binary) = @_;
1246
1247 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1248 my $st = stat($binary);
1249
1250 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1251 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1252 $cachedmtime == $st->mtime;
1253
1254 $kvm_user_version->{$binary} = 'unknown';
1255 $kvm_mtime->{$binary} = $st->mtime;
1256
1257 my $code = sub {
1258 my $line = shift;
1259 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1260 $kvm_user_version->{$binary} = $2;
1261 }
1262 };
1263
1264 eval { run_command([$binary, '--version'], outfunc => $code); };
1265 warn $@ if $@;
1266
1267 return $kvm_user_version->{$binary};
1268
1269 }
1270 my sub extract_version {
1271 my ($machine_type, $version) = @_;
1272 $version = kvm_user_version() if !defined($version);
1273 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
1274 }
1275
1276 sub kernel_has_vhost_net {
1277 return -c '/dev/vhost-net';
1278 }
1279
1280 sub option_exists {
1281 my $key = shift;
1282 return defined($confdesc->{$key});
1283 }
1284
1285 my $cdrom_path;
1286 sub get_cdrom_path {
1287
1288 return $cdrom_path if defined($cdrom_path);
1289
1290 $cdrom_path = first { -l $_ } map { "/dev/cdrom$_" } ('', '1', '2');
1291
1292 if (!defined($cdrom_path)) {
1293 log_warn("no physical CD-ROM available, ignoring");
1294 $cdrom_path = '';
1295 }
1296
1297 return $cdrom_path;
1298 }
1299
1300 sub get_iso_path {
1301 my ($storecfg, $vmid, $cdrom) = @_;
1302
1303 if ($cdrom eq 'cdrom') {
1304 return get_cdrom_path();
1305 } elsif ($cdrom eq 'none') {
1306 return '';
1307 } elsif ($cdrom =~ m|^/|) {
1308 return $cdrom;
1309 } else {
1310 return PVE::Storage::path($storecfg, $cdrom);
1311 }
1312 }
1313
1314 # try to convert old style file names to volume IDs
1315 sub filename_to_volume_id {
1316 my ($vmid, $file, $media) = @_;
1317
1318 if (!($file eq 'none' || $file eq 'cdrom' ||
1319 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1320
1321 return if $file =~ m|/|;
1322
1323 if ($media && $media eq 'cdrom') {
1324 $file = "local:iso/$file";
1325 } else {
1326 $file = "local:$vmid/$file";
1327 }
1328 }
1329
1330 return $file;
1331 }
1332
1333 sub verify_media_type {
1334 my ($opt, $vtype, $media) = @_;
1335
1336 return if !$media;
1337
1338 my $etype;
1339 if ($media eq 'disk') {
1340 $etype = 'images';
1341 } elsif ($media eq 'cdrom') {
1342 $etype = 'iso';
1343 } else {
1344 die "internal error";
1345 }
1346
1347 return if ($vtype eq $etype);
1348
1349 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1350 }
1351
1352 sub cleanup_drive_path {
1353 my ($opt, $storecfg, $drive) = @_;
1354
1355 # try to convert filesystem paths to volume IDs
1356
1357 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1358 ($drive->{file} !~ m|^/dev/.+|) &&
1359 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1360 ($drive->{file} !~ m/^\d+$/)) {
1361 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1362 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1363 if !$vtype;
1364 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1365 verify_media_type($opt, $vtype, $drive->{media});
1366 $drive->{file} = $volid;
1367 }
1368
1369 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1370 }
1371
1372 sub parse_hotplug_features {
1373 my ($data) = @_;
1374
1375 my $res = {};
1376
1377 return $res if $data eq '0';
1378
1379 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1380
1381 foreach my $feature (PVE::Tools::split_list($data)) {
1382 if ($feature =~ m/^(network|disk|cpu|memory|usb|cloudinit)$/) {
1383 $res->{$1} = 1;
1384 } else {
1385 die "invalid hotplug feature '$feature'\n";
1386 }
1387 }
1388 return $res;
1389 }
1390
1391 PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1392 sub pve_verify_hotplug_features {
1393 my ($value, $noerr) = @_;
1394
1395 return $value if parse_hotplug_features($value);
1396
1397 return if $noerr;
1398
1399 die "unable to parse hotplug option\n";
1400 }
1401
1402 sub scsi_inquiry {
1403 my($fh, $noerr) = @_;
1404
1405 my $SG_IO = 0x2285;
1406 my $SG_GET_VERSION_NUM = 0x2282;
1407
1408 my $versionbuf = "\x00" x 8;
1409 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1410 if (!$ret) {
1411 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1412 return;
1413 }
1414 my $version = unpack("I", $versionbuf);
1415 if ($version < 30000) {
1416 die "scsi generic interface too old\n" if !$noerr;
1417 return;
1418 }
1419
1420 my $buf = "\x00" x 36;
1421 my $sensebuf = "\x00" x 8;
1422 my $cmd = pack("C x3 C x1", 0x12, 36);
1423
1424 # see /usr/include/scsi/sg.h
1425 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1426
1427 my $packet = pack(
1428 $sg_io_hdr_t, ord('S'), -3, length($cmd), length($sensebuf), 0, length($buf), $buf, $cmd, $sensebuf, 6000
1429 );
1430
1431 $ret = ioctl($fh, $SG_IO, $packet);
1432 if (!$ret) {
1433 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1434 return;
1435 }
1436
1437 my @res = unpack($sg_io_hdr_t, $packet);
1438 if ($res[17] || $res[18]) {
1439 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1440 return;
1441 }
1442
1443 my $res = {};
1444 $res->@{qw(type removable vendor product revision)} = unpack("C C x6 A8 A16 A4", $buf);
1445
1446 $res->{removable} = $res->{removable} & 128 ? 1 : 0;
1447 $res->{type} &= 0x1F;
1448
1449 return $res;
1450 }
1451
1452 sub path_is_scsi {
1453 my ($path) = @_;
1454
1455 my $fh = IO::File->new("+<$path") || return;
1456 my $res = scsi_inquiry($fh, 1);
1457 close($fh);
1458
1459 return $res;
1460 }
1461
1462 sub print_tabletdevice_full {
1463 my ($conf, $arch) = @_;
1464
1465 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1466
1467 # we use uhci for old VMs because tablet driver was buggy in older qemu
1468 my $usbbus;
1469 if ($q35 || $arch eq 'aarch64') {
1470 $usbbus = 'ehci';
1471 } else {
1472 $usbbus = 'uhci';
1473 }
1474
1475 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1476 }
1477
1478 sub print_keyboarddevice_full {
1479 my ($conf, $arch) = @_;
1480
1481 return if $arch ne 'aarch64';
1482
1483 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1484 }
1485
1486 my sub get_drive_id {
1487 my ($drive) = @_;
1488 return "$drive->{interface}$drive->{index}";
1489 }
1490
1491 sub print_drivedevice_full {
1492 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1493
1494 my $device = '';
1495 my $maxdev = 0;
1496
1497 my $drive_id = get_drive_id($drive);
1498 if ($drive->{interface} eq 'virtio') {
1499 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1500 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1501 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1502 } elsif ($drive->{interface} eq 'scsi') {
1503
1504 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1505 my $unit = $drive->{index} % $maxdev;
1506 my $devicetype = 'hd';
1507 my $path = '';
1508 if (drive_is_cdrom($drive)) {
1509 $devicetype = 'cd';
1510 } else {
1511 if ($drive->{file} =~ m|^/|) {
1512 $path = $drive->{file};
1513 if (my $info = path_is_scsi($path)) {
1514 if ($info->{type} == 0 && $drive->{scsiblock}) {
1515 $devicetype = 'block';
1516 } elsif ($info->{type} == 1) { # tape
1517 $devicetype = 'generic';
1518 }
1519 }
1520 } else {
1521 $path = PVE::Storage::path($storecfg, $drive->{file});
1522 }
1523
1524 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
1525 my $version = extract_version($machine_type, kvm_user_version());
1526 if ($path =~ m/^iscsi\:\/\// &&
1527 !min_version($version, 4, 1)) {
1528 $devicetype = 'generic';
1529 }
1530 }
1531
1532 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1533 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
1534 } else {
1535 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1536 .",lun=$drive->{index}";
1537 }
1538 $device .= ",drive=drive-$drive_id,id=$drive_id";
1539
1540 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1541 $device .= ",rotation_rate=1";
1542 }
1543 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1544
1545 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1546 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1547 my $controller = int($drive->{index} / $maxdev);
1548 my $unit = $drive->{index} % $maxdev;
1549 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1550
1551 $device = "ide-$devicetype";
1552 if ($drive->{interface} eq 'ide') {
1553 $device .= ",bus=ide.$controller,unit=$unit";
1554 } else {
1555 $device .= ",bus=ahci$controller.$unit";
1556 }
1557 $device .= ",drive=drive-$drive_id,id=$drive_id";
1558
1559 if ($devicetype eq 'hd') {
1560 if (my $model = $drive->{model}) {
1561 $model = URI::Escape::uri_unescape($model);
1562 $device .= ",model=$model";
1563 }
1564 if ($drive->{ssd}) {
1565 $device .= ",rotation_rate=1";
1566 }
1567 }
1568 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1569 } elsif ($drive->{interface} eq 'usb') {
1570 die "implement me";
1571 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1572 } else {
1573 die "unsupported interface type";
1574 }
1575
1576 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1577
1578 if (my $serial = $drive->{serial}) {
1579 $serial = URI::Escape::uri_unescape($serial);
1580 $device .= ",serial=$serial";
1581 }
1582
1583
1584 return $device;
1585 }
1586
1587 sub get_initiator_name {
1588 my $initiator;
1589
1590 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1591 while (defined(my $line = <$fh>)) {
1592 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1593 $initiator = $1;
1594 last;
1595 }
1596 $fh->close();
1597
1598 return $initiator;
1599 }
1600
1601 sub print_drive_commandline_full {
1602 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1603
1604 my $path;
1605 my $volid = $drive->{file};
1606 my $format = $drive->{format};
1607 my $drive_id = get_drive_id($drive);
1608
1609 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1610 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1611
1612 if (drive_is_cdrom($drive)) {
1613 $path = get_iso_path($storecfg, $vmid, $volid);
1614 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
1615 } else {
1616 if ($storeid) {
1617 $path = PVE::Storage::path($storecfg, $volid);
1618 $format //= qemu_img_format($scfg, $volname);
1619 } else {
1620 $path = $volid;
1621 $format //= "raw";
1622 }
1623 }
1624
1625 my $is_rbd = $path =~ m/^rbd:/;
1626
1627 my $opts = '';
1628 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1629 foreach my $o (@qemu_drive_options) {
1630 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1631 }
1632
1633 # snapshot only accepts on|off
1634 if (defined($drive->{snapshot})) {
1635 my $v = $drive->{snapshot} ? 'on' : 'off';
1636 $opts .= ",snapshot=$v";
1637 }
1638
1639 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1640 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
1641 }
1642
1643 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1644 my ($dir, $qmpname) = @$type;
1645 if (my $v = $drive->{"mbps$dir"}) {
1646 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1647 }
1648 if (my $v = $drive->{"mbps${dir}_max"}) {
1649 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1650 }
1651 if (my $v = $drive->{"bps${dir}_max_length"}) {
1652 $opts .= ",throttling.bps$qmpname-max-length=$v";
1653 }
1654 if (my $v = $drive->{"iops${dir}"}) {
1655 $opts .= ",throttling.iops$qmpname=$v";
1656 }
1657 if (my $v = $drive->{"iops${dir}_max"}) {
1658 $opts .= ",throttling.iops$qmpname-max=$v";
1659 }
1660 if (my $v = $drive->{"iops${dir}_max_length"}) {
1661 $opts .= ",throttling.iops$qmpname-max-length=$v";
1662 }
1663 }
1664
1665 if ($pbs_name) {
1666 $format = "rbd" if $is_rbd;
1667 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1668 if !$format;
1669 $opts .= ",format=alloc-track,file.driver=$format";
1670 } elsif ($format) {
1671 $opts .= ",format=$format";
1672 }
1673
1674 my $cache_direct = 0;
1675
1676 if (my $cache = $drive->{cache}) {
1677 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1678 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1679 $opts .= ",cache=none";
1680 $cache_direct = 1;
1681 }
1682
1683 # io_uring with cache mode writeback or writethrough on krbd will hang...
1684 my $rbd_no_io_uring = $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1685
1686 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1687 # sometimes, just plain disable...
1688 my $lvm_no_io_uring = $scfg && $scfg->{type} eq 'lvm';
1689
1690 # io_uring causes problems when used with CIFS since kernel 5.15
1691 # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
1692 my $cifs_no_io_uring = $scfg && $scfg->{type} eq 'cifs';
1693
1694 if (!$drive->{aio}) {
1695 if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring && !$cifs_no_io_uring) {
1696 # io_uring supports all cache modes
1697 $opts .= ",aio=io_uring";
1698 } else {
1699 # aio native works only with O_DIRECT
1700 if($cache_direct) {
1701 $opts .= ",aio=native";
1702 } else {
1703 $opts .= ",aio=threads";
1704 }
1705 }
1706 }
1707
1708 if (!drive_is_cdrom($drive)) {
1709 my $detectzeroes;
1710 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1711 $detectzeroes = 'off';
1712 } elsif ($drive->{discard}) {
1713 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1714 } else {
1715 # This used to be our default with discard not being specified:
1716 $detectzeroes = 'on';
1717 }
1718
1719 # note: 'detect-zeroes' works per blockdev and we want it to persist
1720 # after the alloc-track is removed, so put it on 'file' directly
1721 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1722 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1723 }
1724
1725 if ($pbs_name) {
1726 $opts .= ",backing=$pbs_name";
1727 $opts .= ",auto-remove=on";
1728 }
1729
1730 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1731 my $file_param = "file";
1732 if ($pbs_name) {
1733 # non-rbd drivers require the underlying file to be a seperate block
1734 # node, so add a second .file indirection
1735 $file_param .= ".file" if !$is_rbd;
1736 $file_param .= ".filename";
1737 }
1738 my $pathinfo = $path ? "$file_param=$path," : '';
1739
1740 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1741 }
1742
1743 sub print_pbs_blockdev {
1744 my ($pbs_conf, $pbs_name) = @_;
1745 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1746 $blockdev .= ",repository=$pbs_conf->{repository}";
1747 $blockdev .= ",namespace=$pbs_conf->{namespace}" if $pbs_conf->{namespace};
1748 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1749 $blockdev .= ",archive=$pbs_conf->{archive}";
1750 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1751 return $blockdev;
1752 }
1753
1754 sub print_netdevice_full {
1755 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version) = @_;
1756
1757 my $device = $net->{model};
1758 if ($net->{model} eq 'virtio') {
1759 $device = 'virtio-net-pci';
1760 };
1761
1762 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1763 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1764 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1765 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1766 # and out of each queue plus one config interrupt and control vector queue
1767 my $vectors = $net->{queues} * 2 + 2;
1768 $tmpstr .= ",vectors=$vectors,mq=on";
1769 if (min_version($machine_version, 7, 1)) {
1770 $tmpstr .= ",packed=on";
1771 }
1772 }
1773
1774 if (min_version($machine_version, 7, 1) && $net->{model} eq 'virtio'){
1775 $tmpstr .= ",rx_queue_size=1024,tx_queue_size=1024";
1776 }
1777
1778 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1779
1780 if (my $mtu = $net->{mtu}) {
1781 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1782 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1783 if ($mtu == 1) {
1784 $mtu = $bridge_mtu;
1785 } elsif ($mtu < 576) {
1786 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1787 } elsif ($mtu > $bridge_mtu) {
1788 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1789 }
1790 $tmpstr .= ",host_mtu=$mtu";
1791 } else {
1792 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1793 }
1794 }
1795
1796 if ($use_old_bios_files) {
1797 my $romfile;
1798 if ($device eq 'virtio-net-pci') {
1799 $romfile = 'pxe-virtio.rom';
1800 } elsif ($device eq 'e1000') {
1801 $romfile = 'pxe-e1000.rom';
1802 } elsif ($device eq 'e1000e') {
1803 $romfile = 'pxe-e1000e.rom';
1804 } elsif ($device eq 'ne2k') {
1805 $romfile = 'pxe-ne2k_pci.rom';
1806 } elsif ($device eq 'pcnet') {
1807 $romfile = 'pxe-pcnet.rom';
1808 } elsif ($device eq 'rtl8139') {
1809 $romfile = 'pxe-rtl8139.rom';
1810 }
1811 $tmpstr .= ",romfile=$romfile" if $romfile;
1812 }
1813
1814 return $tmpstr;
1815 }
1816
1817 sub print_netdev_full {
1818 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1819
1820 my $i = '';
1821 if ($netid =~ m/^net(\d+)$/) {
1822 $i = int($1);
1823 }
1824
1825 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1826
1827 my $ifname = "tap${vmid}i$i";
1828
1829 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1830 die "interface name '$ifname' is too long (max 15 character)\n"
1831 if length($ifname) >= 16;
1832
1833 my $vhostparam = '';
1834 if (is_native($arch)) {
1835 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1836 }
1837
1838 my $vmname = $conf->{name} || "vm$vmid";
1839
1840 my $netdev = "";
1841 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1842
1843 if ($net->{bridge}) {
1844 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1845 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1846 } else {
1847 $netdev = "type=user,id=$netid,hostname=$vmname";
1848 }
1849
1850 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1851
1852 return $netdev;
1853 }
1854
1855 my $vga_map = {
1856 'cirrus' => 'cirrus-vga',
1857 'std' => 'VGA',
1858 'vmware' => 'vmware-svga',
1859 'virtio' => 'virtio-vga',
1860 'virtio-gl' => 'virtio-vga-gl',
1861 };
1862
1863 sub print_vga_device {
1864 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1865
1866 my $type = $vga_map->{$vga->{type}};
1867 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1868 $type = 'virtio-gpu';
1869 }
1870 my $vgamem_mb = $vga->{memory};
1871
1872 my $max_outputs = '';
1873 if ($qxlnum) {
1874 $type = $id ? 'qxl' : 'qxl-vga';
1875
1876 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1877 # set max outputs so linux can have up to 4 qxl displays with one device
1878 if (min_version($machine_version, 4, 1)) {
1879 $max_outputs = ",max_outputs=4";
1880 }
1881 }
1882 }
1883
1884 die "no devicetype for $vga->{type}\n" if !$type;
1885
1886 my $memory = "";
1887 if ($vgamem_mb) {
1888 if ($vga->{type} =~ /^virtio/) {
1889 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1890 $memory = ",max_hostmem=$bytes";
1891 } elsif ($qxlnum) {
1892 # from https://www.spice-space.org/multiple-monitors.html
1893 $memory = ",vgamem_mb=$vga->{memory}";
1894 my $ram = $vgamem_mb * 4;
1895 my $vram = $vgamem_mb * 2;
1896 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1897 } else {
1898 $memory = ",vgamem_mb=$vga->{memory}";
1899 }
1900 } elsif ($qxlnum && $id) {
1901 $memory = ",ram_size=67108864,vram_size=33554432";
1902 }
1903
1904 my $edidoff = "";
1905 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1906 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1907 }
1908
1909 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1910 my $vgaid = "vga" . ($id // '');
1911 my $pciaddr;
1912 if ($q35 && $vgaid eq 'vga') {
1913 # the first display uses pcie.0 bus on q35 machines
1914 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1915 } else {
1916 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1917 }
1918
1919 if ($vga->{type} eq 'virtio-gl') {
1920 my $base = '/usr/lib/x86_64-linux-gnu/lib';
1921 die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
1922 if !-e "${base}EGL.so.1" || !-e "${base}GL.so.1";
1923
1924 die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
1925 if !PVE::Tools::dir_glob_regex('/dev/dri/', "renderD.*");
1926 }
1927
1928 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1929 }
1930
1931 sub parse_number_sets {
1932 my ($set) = @_;
1933 my $res = [];
1934 foreach my $part (split(/;/, $set)) {
1935 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1936 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1937 push @$res, [ $1, $2 ];
1938 } else {
1939 die "invalid range: $part\n";
1940 }
1941 }
1942 return $res;
1943 }
1944
1945 sub parse_numa {
1946 my ($data) = @_;
1947
1948 my $res = parse_property_string($numa_fmt, $data);
1949 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1950 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
1951 return $res;
1952 }
1953
1954 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1955 sub parse_net {
1956 my ($data, $disable_mac_autogen) = @_;
1957
1958 my $res = eval { parse_property_string($net_fmt, $data) };
1959 if ($@) {
1960 warn $@;
1961 return;
1962 }
1963 if (!defined($res->{macaddr}) && !$disable_mac_autogen) {
1964 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1965 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1966 }
1967 return $res;
1968 }
1969
1970 # ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1971 sub parse_ipconfig {
1972 my ($data) = @_;
1973
1974 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1975 if ($@) {
1976 warn $@;
1977 return;
1978 }
1979
1980 if ($res->{gw} && !$res->{ip}) {
1981 warn 'gateway specified without specifying an IP address';
1982 return;
1983 }
1984 if ($res->{gw6} && !$res->{ip6}) {
1985 warn 'IPv6 gateway specified without specifying an IPv6 address';
1986 return;
1987 }
1988 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1989 warn 'gateway specified together with DHCP';
1990 return;
1991 }
1992 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1993 # gw6 + auto/dhcp
1994 warn "IPv6 gateway specified together with $res->{ip6} address";
1995 return;
1996 }
1997
1998 if (!$res->{ip} && !$res->{ip6}) {
1999 return { ip => 'dhcp', ip6 => 'dhcp' };
2000 }
2001
2002 return $res;
2003 }
2004
2005 sub print_net {
2006 my $net = shift;
2007
2008 return PVE::JSONSchema::print_property_string($net, $net_fmt);
2009 }
2010
2011 sub add_random_macs {
2012 my ($settings) = @_;
2013
2014 foreach my $opt (keys %$settings) {
2015 next if $opt !~ m/^net(\d+)$/;
2016 my $net = parse_net($settings->{$opt});
2017 next if !$net;
2018 $settings->{$opt} = print_net($net);
2019 }
2020 }
2021
2022 sub vm_is_volid_owner {
2023 my ($storecfg, $vmid, $volid) = @_;
2024
2025 if ($volid !~ m|^/|) {
2026 my ($path, $owner);
2027 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
2028 if ($owner && ($owner == $vmid)) {
2029 return 1;
2030 }
2031 }
2032
2033 return;
2034 }
2035
2036 sub vmconfig_register_unused_drive {
2037 my ($storecfg, $vmid, $conf, $drive) = @_;
2038
2039 if (drive_is_cloudinit($drive)) {
2040 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
2041 warn $@ if $@;
2042 delete $conf->{cloudinit};
2043 } elsif (!drive_is_cdrom($drive)) {
2044 my $volid = $drive->{file};
2045 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
2046 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
2047 }
2048 }
2049 }
2050
2051 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
2052 my $smbios1_fmt = {
2053 uuid => {
2054 type => 'string',
2055 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
2056 format_description => 'UUID',
2057 description => "Set SMBIOS1 UUID.",
2058 optional => 1,
2059 },
2060 version => {
2061 type => 'string',
2062 pattern => '[A-Za-z0-9+\/]+={0,2}',
2063 format_description => 'Base64 encoded string',
2064 description => "Set SMBIOS1 version.",
2065 optional => 1,
2066 },
2067 serial => {
2068 type => 'string',
2069 pattern => '[A-Za-z0-9+\/]+={0,2}',
2070 format_description => 'Base64 encoded string',
2071 description => "Set SMBIOS1 serial number.",
2072 optional => 1,
2073 },
2074 manufacturer => {
2075 type => 'string',
2076 pattern => '[A-Za-z0-9+\/]+={0,2}',
2077 format_description => 'Base64 encoded string',
2078 description => "Set SMBIOS1 manufacturer.",
2079 optional => 1,
2080 },
2081 product => {
2082 type => 'string',
2083 pattern => '[A-Za-z0-9+\/]+={0,2}',
2084 format_description => 'Base64 encoded string',
2085 description => "Set SMBIOS1 product ID.",
2086 optional => 1,
2087 },
2088 sku => {
2089 type => 'string',
2090 pattern => '[A-Za-z0-9+\/]+={0,2}',
2091 format_description => 'Base64 encoded string',
2092 description => "Set SMBIOS1 SKU string.",
2093 optional => 1,
2094 },
2095 family => {
2096 type => 'string',
2097 pattern => '[A-Za-z0-9+\/]+={0,2}',
2098 format_description => 'Base64 encoded string',
2099 description => "Set SMBIOS1 family string.",
2100 optional => 1,
2101 },
2102 base64 => {
2103 type => 'boolean',
2104 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2105 optional => 1,
2106 },
2107 };
2108
2109 sub parse_smbios1 {
2110 my ($data) = @_;
2111
2112 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2113 warn $@ if $@;
2114 return $res;
2115 }
2116
2117 sub print_smbios1 {
2118 my ($smbios1) = @_;
2119 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2120 }
2121
2122 PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2123
2124 sub parse_watchdog {
2125 my ($value) = @_;
2126
2127 return if !$value;
2128
2129 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2130 warn $@ if $@;
2131 return $res;
2132 }
2133
2134 sub parse_guest_agent {
2135 my ($conf) = @_;
2136
2137 return {} if !defined($conf->{agent});
2138
2139 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2140 warn $@ if $@;
2141
2142 # if the agent is disabled ignore the other potentially set properties
2143 return {} if !$res->{enabled};
2144 return $res;
2145 }
2146
2147 sub get_qga_key {
2148 my ($conf, $key) = @_;
2149 return undef if !defined($conf->{agent});
2150
2151 my $agent = parse_guest_agent($conf);
2152 return $agent->{$key};
2153 }
2154
2155 sub parse_vga {
2156 my ($value) = @_;
2157
2158 return {} if !$value;
2159 my $res = eval { parse_property_string($vga_fmt, $value) };
2160 warn $@ if $@;
2161 return $res;
2162 }
2163
2164 sub parse_rng {
2165 my ($value) = @_;
2166
2167 return if !$value;
2168
2169 my $res = eval { parse_property_string($rng_fmt, $value) };
2170 warn $@ if $@;
2171 return $res;
2172 }
2173
2174 sub parse_meta_info {
2175 my ($value) = @_;
2176
2177 return if !$value;
2178
2179 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2180 warn $@ if $@;
2181 return $res;
2182 }
2183
2184 sub new_meta_info_string {
2185 my () = @_; # for now do not allow to override any value
2186
2187 return PVE::JSONSchema::print_property_string(
2188 {
2189 'creation-qemu' => kvm_user_version(),
2190 ctime => "". int(time()),
2191 },
2192 $meta_info_fmt
2193 );
2194 }
2195
2196 sub qemu_created_version_fixups {
2197 my ($conf, $forcemachine, $kvmver) = @_;
2198
2199 my $meta = parse_meta_info($conf->{meta}) // {};
2200 my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
2201
2202 # check if we need to apply some handling for VMs that always use the latest machine version but
2203 # had a machine version transition happen that affected HW such that, e.g., an OS config change
2204 # would be required (we do not want to pin machine version for non-windows OS type)
2205 if (
2206 (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
2207 && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
2208 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
2209 && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
2210 ) {
2211 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
2212 if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
2213 # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
2214 # and thus with the predictable interface naming of systemd
2215 return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
2216 }
2217 }
2218 return;
2219 }
2220
2221 PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
2222 sub verify_usb_device {
2223 my ($value, $noerr) = @_;
2224
2225 return $value if parse_usb_device($value);
2226
2227 return if $noerr;
2228
2229 die "unable to parse usb device\n";
2230 }
2231
2232 # add JSON properties for create and set function
2233 sub json_config_properties {
2234 my ($prop, $with_disk_alloc) = @_;
2235
2236 my $skip_json_config_opts = {
2237 parent => 1,
2238 snaptime => 1,
2239 vmstate => 1,
2240 runningmachine => 1,
2241 runningcpu => 1,
2242 meta => 1,
2243 };
2244
2245 foreach my $opt (keys %$confdesc) {
2246 next if $skip_json_config_opts->{$opt};
2247
2248 if ($with_disk_alloc && is_valid_drivename($opt)) {
2249 $prop->{$opt} = $PVE::QemuServer::Drive::drivedesc_hash_with_alloc->{$opt};
2250 } else {
2251 $prop->{$opt} = $confdesc->{$opt};
2252 }
2253 }
2254
2255 return $prop;
2256 }
2257
2258 # Properties that we can read from an OVF file
2259 sub json_ovf_properties {
2260 my $prop = {};
2261
2262 for my $device (PVE::QemuServer::Drive::valid_drive_names()) {
2263 $prop->{$device} = {
2264 type => 'string',
2265 format => 'pve-volume-id-or-absolute-path',
2266 description => "Disk image that gets imported to $device",
2267 optional => 1,
2268 };
2269 }
2270
2271 $prop->{cores} = {
2272 type => 'integer',
2273 description => "The number of CPU cores.",
2274 optional => 1,
2275 };
2276 $prop->{memory} = {
2277 type => 'integer',
2278 description => "Amount of RAM for the VM in MB.",
2279 optional => 1,
2280 };
2281 $prop->{name} = {
2282 type => 'string',
2283 description => "Name of the VM.",
2284 optional => 1,
2285 };
2286
2287 return $prop;
2288 }
2289
2290 # return copy of $confdesc_cloudinit to generate documentation
2291 sub cloudinit_config_properties {
2292
2293 return dclone($confdesc_cloudinit);
2294 }
2295
2296 sub cloudinit_pending_properties {
2297 my $p = {
2298 map { $_ => 1 } keys $confdesc_cloudinit->%*,
2299 name => 1,
2300 };
2301 $p->{"net$_"} = 1 for 0..($MAX_NETS-1);
2302 return $p;
2303 }
2304
2305 sub check_type {
2306 my ($key, $value) = @_;
2307
2308 die "unknown setting '$key'\n" if !$confdesc->{$key};
2309
2310 my $type = $confdesc->{$key}->{type};
2311
2312 if (!defined($value)) {
2313 die "got undefined value\n";
2314 }
2315
2316 if ($value =~ m/[\n\r]/) {
2317 die "property contains a line feed\n";
2318 }
2319
2320 if ($type eq 'boolean') {
2321 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2322 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2323 die "type check ('boolean') failed - got '$value'\n";
2324 } elsif ($type eq 'integer') {
2325 return int($1) if $value =~ m/^(\d+)$/;
2326 die "type check ('integer') failed - got '$value'\n";
2327 } elsif ($type eq 'number') {
2328 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2329 die "type check ('number') failed - got '$value'\n";
2330 } elsif ($type eq 'string') {
2331 if (my $fmt = $confdesc->{$key}->{format}) {
2332 PVE::JSONSchema::check_format($fmt, $value);
2333 return $value;
2334 }
2335 $value =~ s/^\"(.*)\"$/$1/;
2336 return $value;
2337 } else {
2338 die "internal error"
2339 }
2340 }
2341
2342 sub destroy_vm {
2343 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2344
2345 my $conf = PVE::QemuConfig->load_config($vmid);
2346
2347 if (!$skiplock && !PVE::QemuConfig->has_lock($conf, 'suspended')) {
2348 PVE::QemuConfig->check_lock($conf);
2349 }
2350
2351 if ($conf->{template}) {
2352 # check if any base image is still used by a linked clone
2353 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2354 my ($ds, $drive) = @_;
2355 return if drive_is_cdrom($drive);
2356
2357 my $volid = $drive->{file};
2358 return if !$volid || $volid =~ m|^/|;
2359
2360 die "base volume '$volid' is still in use by linked cloned\n"
2361 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2362
2363 });
2364 }
2365
2366 my $volids = {};
2367 my $remove_owned_drive = sub {
2368 my ($ds, $drive) = @_;
2369 return if drive_is_cdrom($drive, 1);
2370
2371 my $volid = $drive->{file};
2372 return if !$volid || $volid =~ m|^/|;
2373 return if $volids->{$volid};
2374
2375 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2376 return if !$path || !$owner || ($owner != $vmid);
2377
2378 $volids->{$volid} = 1;
2379 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2380 warn "Could not remove disk '$volid', check manually: $@" if $@;
2381 };
2382
2383 # only remove disks owned by this VM (referenced in the config)
2384 my $include_opts = {
2385 include_unused => 1,
2386 extra_keys => ['vmstate'],
2387 };
2388 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2389
2390 for my $snap (values %{$conf->{snapshots}}) {
2391 next if !defined($snap->{vmstate});
2392 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2393 next if !defined($drive);
2394 $remove_owned_drive->('vmstate', $drive);
2395 }
2396
2397 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2398
2399 if ($purge_unreferenced) { # also remove unreferenced disk
2400 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2401 PVE::Storage::foreach_volid($vmdisks, sub {
2402 my ($volid, $sid, $volname, $d) = @_;
2403 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2404 warn $@ if $@;
2405 });
2406 }
2407
2408 if (defined $replacement_conf) {
2409 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2410 } else {
2411 PVE::QemuConfig->destroy_config($vmid);
2412 }
2413 }
2414
2415 sub parse_vm_config {
2416 my ($filename, $raw, $strict) = @_;
2417
2418 return if !defined($raw);
2419
2420 my $res = {
2421 digest => Digest::SHA::sha1_hex($raw),
2422 snapshots => {},
2423 pending => {},
2424 cloudinit => {},
2425 };
2426
2427 my $handle_error = sub {
2428 my ($msg) = @_;
2429
2430 if ($strict) {
2431 die $msg;
2432 } else {
2433 warn $msg;
2434 }
2435 };
2436
2437 $filename =~ m|/qemu-server/(\d+)\.conf$|
2438 || die "got strange filename '$filename'";
2439
2440 my $vmid = $1;
2441
2442 my $conf = $res;
2443 my $descr;
2444 my $finish_description = sub {
2445 if (defined($descr)) {
2446 $descr =~ s/\s+$//;
2447 $conf->{description} = $descr;
2448 }
2449 $descr = undef;
2450 };
2451 my $section = '';
2452
2453 my @lines = split(/\n/, $raw);
2454 foreach my $line (@lines) {
2455 next if $line =~ m/^\s*$/;
2456
2457 if ($line =~ m/^\[PENDING\]\s*$/i) {
2458 $section = 'pending';
2459 $finish_description->();
2460 $conf = $res->{$section} = {};
2461 next;
2462 } elsif ($line =~ m/^\[special:cloudinit\]\s*$/i) {
2463 $section = 'cloudinit';
2464 $finish_description->();
2465 $conf = $res->{$section} = {};
2466 next;
2467
2468 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2469 $section = $1;
2470 $finish_description->();
2471 $conf = $res->{snapshots}->{$section} = {};
2472 next;
2473 }
2474
2475 if ($line =~ m/^\#(.*)$/) {
2476 $descr = '' if !defined($descr);
2477 $descr .= PVE::Tools::decode_text($1) . "\n";
2478 next;
2479 }
2480
2481 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2482 $descr = '' if !defined($descr);
2483 $descr .= PVE::Tools::decode_text($2);
2484 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2485 $conf->{snapstate} = $1;
2486 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2487 my $key = $1;
2488 my $value = $2;
2489 $conf->{$key} = $value;
2490 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2491 my $value = $1;
2492 if ($section eq 'pending') {
2493 $conf->{delete} = $value; # we parse this later
2494 } else {
2495 $handle_error->("vm $vmid - property 'delete' is only allowed in [PENDING]\n");
2496 }
2497 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2498 my $key = $1;
2499 my $value = $2;
2500 if ($section eq 'cloudinit') {
2501 # ignore validation only used for informative purpose
2502 $conf->{$key} = $value;
2503 next;
2504 }
2505 eval { $value = check_type($key, $value); };
2506 if ($@) {
2507 $handle_error->("vm $vmid - unable to parse value of '$key' - $@");
2508 } else {
2509 $key = 'ide2' if $key eq 'cdrom';
2510 my $fmt = $confdesc->{$key}->{format};
2511 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2512 my $v = parse_drive($key, $value);
2513 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2514 $v->{file} = $volid;
2515 $value = print_drive($v);
2516 } else {
2517 $handle_error->("vm $vmid - unable to parse value of '$key'\n");
2518 next;
2519 }
2520 }
2521
2522 $conf->{$key} = $value;
2523 }
2524 } else {
2525 $handle_error->("vm $vmid - unable to parse config: $line\n");
2526 }
2527 }
2528
2529 $finish_description->();
2530 delete $res->{snapstate}; # just to be sure
2531
2532 return $res;
2533 }
2534
2535 sub write_vm_config {
2536 my ($filename, $conf) = @_;
2537
2538 delete $conf->{snapstate}; # just to be sure
2539
2540 if ($conf->{cdrom}) {
2541 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2542 $conf->{ide2} = $conf->{cdrom};
2543 delete $conf->{cdrom};
2544 }
2545
2546 # we do not use 'smp' any longer
2547 if ($conf->{sockets}) {
2548 delete $conf->{smp};
2549 } elsif ($conf->{smp}) {
2550 $conf->{sockets} = $conf->{smp};
2551 delete $conf->{cores};
2552 delete $conf->{smp};
2553 }
2554
2555 my $used_volids = {};
2556
2557 my $cleanup_config = sub {
2558 my ($cref, $pending, $snapname) = @_;
2559
2560 foreach my $key (keys %$cref) {
2561 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2562 $key eq 'snapstate' || $key eq 'pending' || $key eq 'cloudinit';
2563 my $value = $cref->{$key};
2564 if ($key eq 'delete') {
2565 die "propertry 'delete' is only allowed in [PENDING]\n"
2566 if !$pending;
2567 # fixme: check syntax?
2568 next;
2569 }
2570 eval { $value = check_type($key, $value); };
2571 die "unable to parse value of '$key' - $@" if $@;
2572
2573 $cref->{$key} = $value;
2574
2575 if (!$snapname && is_valid_drivename($key)) {
2576 my $drive = parse_drive($key, $value);
2577 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2578 }
2579 }
2580 };
2581
2582 &$cleanup_config($conf);
2583
2584 &$cleanup_config($conf->{pending}, 1);
2585
2586 foreach my $snapname (keys %{$conf->{snapshots}}) {
2587 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2588 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2589 }
2590
2591 # remove 'unusedX' settings if we re-add a volume
2592 foreach my $key (keys %$conf) {
2593 my $value = $conf->{$key};
2594 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2595 delete $conf->{$key};
2596 }
2597 }
2598
2599 my $generate_raw_config = sub {
2600 my ($conf, $pending) = @_;
2601
2602 my $raw = '';
2603
2604 # add description as comment to top of file
2605 if (defined(my $descr = $conf->{description})) {
2606 if ($descr) {
2607 foreach my $cl (split(/\n/, $descr)) {
2608 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2609 }
2610 } else {
2611 $raw .= "#\n" if $pending;
2612 }
2613 }
2614
2615 foreach my $key (sort keys %$conf) {
2616 next if $key =~ /^(digest|description|pending|cloudinit|snapshots)$/;
2617 $raw .= "$key: $conf->{$key}\n";
2618 }
2619 return $raw;
2620 };
2621
2622 my $raw = &$generate_raw_config($conf);
2623
2624 if (scalar(keys %{$conf->{pending}})){
2625 $raw .= "\n[PENDING]\n";
2626 $raw .= &$generate_raw_config($conf->{pending}, 1);
2627 }
2628
2629 if (scalar(keys %{$conf->{cloudinit}}) && PVE::QemuConfig->has_cloudinit($conf)){
2630 $raw .= "\n[special:cloudinit]\n";
2631 $raw .= &$generate_raw_config($conf->{cloudinit});
2632 }
2633
2634 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2635 $raw .= "\n[$snapname]\n";
2636 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2637 }
2638
2639 return $raw;
2640 }
2641
2642 sub load_defaults {
2643
2644 my $res = {};
2645
2646 # we use static defaults from our JSON schema configuration
2647 foreach my $key (keys %$confdesc) {
2648 if (defined(my $default = $confdesc->{$key}->{default})) {
2649 $res->{$key} = $default;
2650 }
2651 }
2652
2653 return $res;
2654 }
2655
2656 sub config_list {
2657 my $vmlist = PVE::Cluster::get_vmlist();
2658 my $res = {};
2659 return $res if !$vmlist || !$vmlist->{ids};
2660 my $ids = $vmlist->{ids};
2661 my $nodename = nodename();
2662
2663 foreach my $vmid (keys %$ids) {
2664 my $d = $ids->{$vmid};
2665 next if !$d->{node} || $d->{node} ne $nodename;
2666 next if !$d->{type} || $d->{type} ne 'qemu';
2667 $res->{$vmid}->{exists} = 1;
2668 }
2669 return $res;
2670 }
2671
2672 # test if VM uses local resources (to prevent migration)
2673 sub check_local_resources {
2674 my ($conf, $noerr) = @_;
2675
2676 my @loc_res = ();
2677
2678 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2679 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2680
2681 push @loc_res, "ivshmem" if $conf->{ivshmem};
2682
2683 foreach my $k (keys %$conf) {
2684 next if $k =~ m/^usb/ && ($conf->{$k} =~ m/^spice(?![^,])/);
2685 # sockets are safe: they will recreated be on the target side post-migrate
2686 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2687 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2688 }
2689
2690 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2691
2692 return \@loc_res;
2693 }
2694
2695 # check if used storages are available on all nodes (use by migrate)
2696 sub check_storage_availability {
2697 my ($storecfg, $conf, $node) = @_;
2698
2699 PVE::QemuConfig->foreach_volume($conf, sub {
2700 my ($ds, $drive) = @_;
2701
2702 my $volid = $drive->{file};
2703 return if !$volid;
2704
2705 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2706 return if !$sid;
2707
2708 # check if storage is available on both nodes
2709 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2710 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2711
2712 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2713
2714 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2715 if !$scfg->{content}->{$vtype};
2716 });
2717 }
2718
2719 # list nodes where all VM images are available (used by has_feature API)
2720 sub shared_nodes {
2721 my ($conf, $storecfg) = @_;
2722
2723 my $nodelist = PVE::Cluster::get_nodelist();
2724 my $nodehash = { map { $_ => 1 } @$nodelist };
2725 my $nodename = nodename();
2726
2727 PVE::QemuConfig->foreach_volume($conf, sub {
2728 my ($ds, $drive) = @_;
2729
2730 my $volid = $drive->{file};
2731 return if !$volid;
2732
2733 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2734 if ($storeid) {
2735 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2736 if ($scfg->{disable}) {
2737 $nodehash = {};
2738 } elsif (my $avail = $scfg->{nodes}) {
2739 foreach my $node (keys %$nodehash) {
2740 delete $nodehash->{$node} if !$avail->{$node};
2741 }
2742 } elsif (!$scfg->{shared}) {
2743 foreach my $node (keys %$nodehash) {
2744 delete $nodehash->{$node} if $node ne $nodename
2745 }
2746 }
2747 }
2748 });
2749
2750 return $nodehash
2751 }
2752
2753 sub check_local_storage_availability {
2754 my ($conf, $storecfg) = @_;
2755
2756 my $nodelist = PVE::Cluster::get_nodelist();
2757 my $nodehash = { map { $_ => {} } @$nodelist };
2758
2759 PVE::QemuConfig->foreach_volume($conf, sub {
2760 my ($ds, $drive) = @_;
2761
2762 my $volid = $drive->{file};
2763 return if !$volid;
2764
2765 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2766 if ($storeid) {
2767 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2768
2769 if ($scfg->{disable}) {
2770 foreach my $node (keys %$nodehash) {
2771 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2772 }
2773 } elsif (my $avail = $scfg->{nodes}) {
2774 foreach my $node (keys %$nodehash) {
2775 if (!$avail->{$node}) {
2776 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2777 }
2778 }
2779 }
2780 }
2781 });
2782
2783 foreach my $node (values %$nodehash) {
2784 if (my $unavail = $node->{unavailable_storages}) {
2785 $node->{unavailable_storages} = [ sort keys %$unavail ];
2786 }
2787 }
2788
2789 return $nodehash
2790 }
2791
2792 # Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2793 sub check_running {
2794 my ($vmid, $nocheck, $node) = @_;
2795
2796 # $nocheck is set when called during a migration, in which case the config
2797 # file might still or already reside on the *other* node
2798 # - because rename has already happened, and current node is source
2799 # - because rename hasn't happened yet, and current node is target
2800 # - because rename has happened, current node is target, but hasn't yet
2801 # processed it yet
2802 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2803 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2804 }
2805
2806 sub vzlist {
2807
2808 my $vzlist = config_list();
2809
2810 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2811
2812 while (defined(my $de = $fd->read)) {
2813 next if $de !~ m/^(\d+)\.pid$/;
2814 my $vmid = $1;
2815 next if !defined($vzlist->{$vmid});
2816 if (my $pid = check_running($vmid)) {
2817 $vzlist->{$vmid}->{pid} = $pid;
2818 }
2819 }
2820
2821 return $vzlist;
2822 }
2823
2824 our $vmstatus_return_properties = {
2825 vmid => get_standard_option('pve-vmid'),
2826 status => {
2827 description => "QEMU process status.",
2828 type => 'string',
2829 enum => ['stopped', 'running'],
2830 },
2831 maxmem => {
2832 description => "Maximum memory in bytes.",
2833 type => 'integer',
2834 optional => 1,
2835 renderer => 'bytes',
2836 },
2837 maxdisk => {
2838 description => "Root disk size in bytes.",
2839 type => 'integer',
2840 optional => 1,
2841 renderer => 'bytes',
2842 },
2843 name => {
2844 description => "VM name.",
2845 type => 'string',
2846 optional => 1,
2847 },
2848 qmpstatus => {
2849 description => "QEMU QMP agent status.",
2850 type => 'string',
2851 optional => 1,
2852 },
2853 pid => {
2854 description => "PID of running qemu process.",
2855 type => 'integer',
2856 optional => 1,
2857 },
2858 uptime => {
2859 description => "Uptime.",
2860 type => 'integer',
2861 optional => 1,
2862 renderer => 'duration',
2863 },
2864 cpus => {
2865 description => "Maximum usable CPUs.",
2866 type => 'number',
2867 optional => 1,
2868 },
2869 lock => {
2870 description => "The current config lock, if any.",
2871 type => 'string',
2872 optional => 1,
2873 },
2874 tags => {
2875 description => "The current configured tags, if any",
2876 type => 'string',
2877 optional => 1,
2878 },
2879 'running-machine' => {
2880 description => "The currently running machine type (if running).",
2881 type => 'string',
2882 optional => 1,
2883 },
2884 'running-qemu' => {
2885 description => "The currently running QEMU version (if running).",
2886 type => 'string',
2887 optional => 1,
2888 },
2889 };
2890
2891 my $last_proc_pid_stat;
2892
2893 # get VM status information
2894 # This must be fast and should not block ($full == false)
2895 # We only query KVM using QMP if $full == true (this can be slow)
2896 sub vmstatus {
2897 my ($opt_vmid, $full) = @_;
2898
2899 my $res = {};
2900
2901 my $storecfg = PVE::Storage::config();
2902
2903 my $list = vzlist();
2904 my $defaults = load_defaults();
2905
2906 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2907
2908 my $cpucount = $cpuinfo->{cpus} || 1;
2909
2910 foreach my $vmid (keys %$list) {
2911 next if $opt_vmid && ($vmid ne $opt_vmid);
2912
2913 my $conf = PVE::QemuConfig->load_config($vmid);
2914
2915 my $d = { vmid => int($vmid) };
2916 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2917
2918 # fixme: better status?
2919 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2920
2921 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2922 if (defined($size)) {
2923 $d->{disk} = 0; # no info available
2924 $d->{maxdisk} = $size;
2925 } else {
2926 $d->{disk} = 0;
2927 $d->{maxdisk} = 0;
2928 }
2929
2930 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2931 * ($conf->{cores} || $defaults->{cores});
2932 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2933 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2934
2935 $d->{name} = $conf->{name} || "VM $vmid";
2936 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2937 : $defaults->{memory}*(1024*1024);
2938
2939 if ($conf->{balloon}) {
2940 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2941 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2942 : $defaults->{shares};
2943 }
2944
2945 $d->{uptime} = 0;
2946 $d->{cpu} = 0;
2947 $d->{mem} = 0;
2948
2949 $d->{netout} = 0;
2950 $d->{netin} = 0;
2951
2952 $d->{diskread} = 0;
2953 $d->{diskwrite} = 0;
2954
2955 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2956
2957 $d->{serial} = 1 if conf_has_serial($conf);
2958 $d->{lock} = $conf->{lock} if $conf->{lock};
2959 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2960
2961 $res->{$vmid} = $d;
2962 }
2963
2964 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2965 foreach my $dev (keys %$netdev) {
2966 next if $dev !~ m/^tap([1-9]\d*)i/;
2967 my $vmid = $1;
2968 my $d = $res->{$vmid};
2969 next if !$d;
2970
2971 $d->{netout} += $netdev->{$dev}->{receive};
2972 $d->{netin} += $netdev->{$dev}->{transmit};
2973
2974 if ($full) {
2975 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2976 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2977 }
2978
2979 }
2980
2981 my $ctime = gettimeofday;
2982
2983 foreach my $vmid (keys %$list) {
2984
2985 my $d = $res->{$vmid};
2986 my $pid = $d->{pid};
2987 next if !$pid;
2988
2989 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2990 next if !$pstat; # not running
2991
2992 my $used = $pstat->{utime} + $pstat->{stime};
2993
2994 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2995
2996 if ($pstat->{vsize}) {
2997 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2998 }
2999
3000 my $old = $last_proc_pid_stat->{$pid};
3001 if (!$old) {
3002 $last_proc_pid_stat->{$pid} = {
3003 time => $ctime,
3004 used => $used,
3005 cpu => 0,
3006 };
3007 next;
3008 }
3009
3010 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
3011
3012 if ($dtime > 1000) {
3013 my $dutime = $used - $old->{used};
3014
3015 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
3016 $last_proc_pid_stat->{$pid} = {
3017 time => $ctime,
3018 used => $used,
3019 cpu => $d->{cpu},
3020 };
3021 } else {
3022 $d->{cpu} = $old->{cpu};
3023 }
3024 }
3025
3026 return $res if !$full;
3027
3028 my $qmpclient = PVE::QMPClient->new();
3029
3030 my $ballooncb = sub {
3031 my ($vmid, $resp) = @_;
3032
3033 my $info = $resp->{'return'};
3034 return if !$info->{max_mem};
3035
3036 my $d = $res->{$vmid};
3037
3038 # use memory assigned to VM
3039 $d->{maxmem} = $info->{max_mem};
3040 $d->{balloon} = $info->{actual};
3041
3042 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
3043 $d->{mem} = $info->{total_mem} - $info->{free_mem};
3044 $d->{freemem} = $info->{free_mem};
3045 }
3046
3047 $d->{ballooninfo} = $info;
3048 };
3049
3050 my $blockstatscb = sub {
3051 my ($vmid, $resp) = @_;
3052 my $data = $resp->{'return'} || [];
3053 my $totalrdbytes = 0;
3054 my $totalwrbytes = 0;
3055
3056 for my $blockstat (@$data) {
3057 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
3058 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
3059
3060 $blockstat->{device} =~ s/drive-//;
3061 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
3062 }
3063 $res->{$vmid}->{diskread} = $totalrdbytes;
3064 $res->{$vmid}->{diskwrite} = $totalwrbytes;
3065 };
3066
3067 my $machinecb = sub {
3068 my ($vmid, $resp) = @_;
3069 my $data = $resp->{'return'} || [];
3070
3071 $res->{$vmid}->{'running-machine'} =
3072 PVE::QemuServer::Machine::current_from_query_machines($data);
3073 };
3074
3075 my $versioncb = sub {
3076 my ($vmid, $resp) = @_;
3077 my $data = $resp->{'return'} // {};
3078 my $version = 'unknown';
3079
3080 if (my $v = $data->{qemu}) {
3081 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
3082 }
3083
3084 $res->{$vmid}->{'running-qemu'} = $version;
3085 };
3086
3087 my $statuscb = sub {
3088 my ($vmid, $resp) = @_;
3089
3090 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
3091 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
3092 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
3093 # this fails if ballon driver is not loaded, so this must be
3094 # the last commnand (following command are aborted if this fails).
3095 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
3096
3097 my $status = 'unknown';
3098 if (!defined($status = $resp->{'return'}->{status})) {
3099 warn "unable to get VM status\n";
3100 return;
3101 }
3102
3103 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
3104 };
3105
3106 foreach my $vmid (keys %$list) {
3107 next if $opt_vmid && ($vmid ne $opt_vmid);
3108 next if !$res->{$vmid}->{pid}; # not running
3109 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
3110 }
3111
3112 $qmpclient->queue_execute(undef, 2);
3113
3114 foreach my $vmid (keys %$list) {
3115 next if $opt_vmid && ($vmid ne $opt_vmid);
3116 next if !$res->{$vmid}->{pid}; #not running
3117
3118 # we can't use the $qmpclient since it might have already aborted on
3119 # 'query-balloon', but this might also fail for older versions...
3120 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
3121 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
3122 }
3123
3124 foreach my $vmid (keys %$list) {
3125 next if $opt_vmid && ($vmid ne $opt_vmid);
3126 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
3127 }
3128
3129 return $res;
3130 }
3131
3132 sub conf_has_serial {
3133 my ($conf) = @_;
3134
3135 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3136 if ($conf->{"serial$i"}) {
3137 return 1;
3138 }
3139 }
3140
3141 return 0;
3142 }
3143
3144 sub conf_has_audio {
3145 my ($conf, $id) = @_;
3146
3147 $id //= 0;
3148 my $audio = $conf->{"audio$id"};
3149 return if !defined($audio);
3150
3151 my $audioproperties = parse_property_string($audio_fmt, $audio);
3152 my $audiodriver = $audioproperties->{driver} // 'spice';
3153
3154 return {
3155 dev => $audioproperties->{device},
3156 dev_id => "audiodev$id",
3157 backend => $audiodriver,
3158 backend_id => "$audiodriver-backend${id}",
3159 };
3160 }
3161
3162 sub audio_devs {
3163 my ($audio, $audiopciaddr, $machine_version) = @_;
3164
3165 my $devs = [];
3166
3167 my $id = $audio->{dev_id};
3168 my $audiodev = "";
3169 if (min_version($machine_version, 4, 2)) {
3170 $audiodev = ",audiodev=$audio->{backend_id}";
3171 }
3172
3173 if ($audio->{dev} eq 'AC97') {
3174 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
3175 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3176 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
3177 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3178 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
3179 } else {
3180 die "unkown audio device '$audio->{dev}', implement me!";
3181 }
3182
3183 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3184
3185 return $devs;
3186 }
3187
3188 sub get_tpm_paths {
3189 my ($vmid) = @_;
3190 return {
3191 socket => "/var/run/qemu-server/$vmid.swtpm",
3192 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3193 };
3194 }
3195
3196 sub add_tpm_device {
3197 my ($vmid, $devices, $conf) = @_;
3198
3199 return if !$conf->{tpmstate0};
3200
3201 my $paths = get_tpm_paths($vmid);
3202
3203 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3204 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3205 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3206 }
3207
3208 sub start_swtpm {
3209 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3210
3211 return if !$tpmdrive;
3212
3213 my $state;
3214 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3215 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3216 if ($storeid) {
3217 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3218 } else {
3219 $state = $tpm->{file};
3220 }
3221
3222 my $paths = get_tpm_paths($vmid);
3223
3224 # during migration, we will get state from remote
3225 #
3226 if (!$migration) {
3227 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3228 my $setup_cmd = [
3229 "swtpm_setup",
3230 "--tpmstate",
3231 "file://$state",
3232 "--createek",
3233 "--create-ek-cert",
3234 "--create-platform-cert",
3235 "--lock-nvram",
3236 "--config",
3237 "/etc/swtpm_setup.conf", # do not use XDG configs
3238 "--runas",
3239 "0", # force creation as root, error if not possible
3240 "--not-overwrite", # ignore existing state, do not modify
3241 ];
3242
3243 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3244 # TPM 2.0 supports ECC crypto, use if possible
3245 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3246
3247 run_command($setup_cmd, outfunc => sub {
3248 print "swtpm_setup: $1\n";
3249 });
3250 }
3251
3252 my $emulator_cmd = [
3253 "swtpm",
3254 "socket",
3255 "--tpmstate",
3256 "backend-uri=file://$state,mode=0600",
3257 "--ctrl",
3258 "type=unixio,path=$paths->{socket},mode=0600",
3259 "--pid",
3260 "file=$paths->{pid}",
3261 "--terminate", # terminate on QEMU disconnect
3262 "--daemon",
3263 ];
3264 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3265 run_command($emulator_cmd, outfunc => sub { print $1; });
3266
3267 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3268 while (! -e $paths->{pid}) {
3269 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3270 usleep(50_000);
3271 }
3272
3273 # return untainted PID of swtpm daemon so it can be killed on error
3274 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3275 return $1;
3276 }
3277
3278 sub vga_conf_has_spice {
3279 my ($vga) = @_;
3280
3281 my $vgaconf = parse_vga($vga);
3282 my $vgatype = $vgaconf->{type};
3283 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3284
3285 return $1 || 1;
3286 }
3287
3288 sub is_native($) {
3289 my ($arch) = @_;
3290 return get_host_arch() eq $arch;
3291 }
3292
3293 sub get_vm_arch {
3294 my ($conf) = @_;
3295 return $conf->{arch} // get_host_arch();
3296 }
3297
3298 my $default_machines = {
3299 x86_64 => 'pc',
3300 aarch64 => 'virt',
3301 };
3302
3303 sub get_installed_machine_version {
3304 my ($kvmversion) = @_;
3305 $kvmversion = kvm_user_version() if !defined($kvmversion);
3306 $kvmversion =~ m/^(\d+\.\d+)/;
3307 return $1;
3308 }
3309
3310 sub windows_get_pinned_machine_version {
3311 my ($machine, $base_version, $kvmversion) = @_;
3312
3313 my $pin_version = $base_version;
3314 if (!defined($base_version) ||
3315 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3316 ) {
3317 $pin_version = get_installed_machine_version($kvmversion);
3318 }
3319 if (!$machine || $machine eq 'pc') {
3320 $machine = "pc-i440fx-$pin_version";
3321 } elsif ($machine eq 'q35') {
3322 $machine = "pc-q35-$pin_version";
3323 } elsif ($machine eq 'virt') {
3324 $machine = "virt-$pin_version";
3325 } else {
3326 warn "unknown machine type '$machine', not touching that!\n";
3327 }
3328
3329 return $machine;
3330 }
3331
3332 sub get_vm_machine {
3333 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3334
3335 my $machine = $forcemachine || $conf->{machine};
3336
3337 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3338 $kvmversion //= kvm_user_version();
3339 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3340 # layout which confuses windows quite a bit and may result in various regressions..
3341 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3342 if (windows_version($conf->{ostype})) {
3343 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3344 }
3345 $arch //= 'x86_64';
3346 $machine ||= $default_machines->{$arch};
3347 if ($add_pve_version) {
3348 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3349 $machine .= "+pve$pvever";
3350 }
3351 }
3352
3353 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3354 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3355 $machine = $1 if $is_pxe;
3356
3357 # for version-pinned machines that do not include a pve-version (e.g.
3358 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3359 $machine .= '+pve0';
3360
3361 $machine .= '.pxe' if $is_pxe;
3362 }
3363
3364 return $machine;
3365 }
3366
3367 sub get_ovmf_files($$$) {
3368 my ($arch, $efidisk, $smm) = @_;
3369
3370 my $types = $OVMF->{$arch}
3371 or die "no OVMF images known for architecture '$arch'\n";
3372
3373 my $type = 'default';
3374 if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3375 $type = $smm ? "4m" : "4m-no-smm";
3376 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3377 }
3378
3379 my ($ovmf_code, $ovmf_vars) = $types->{$type}->@*;
3380 die "EFI base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3381 die "EFI vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
3382
3383 return ($ovmf_code, $ovmf_vars);
3384 }
3385
3386 my $Arch2Qemu = {
3387 aarch64 => '/usr/bin/qemu-system-aarch64',
3388 x86_64 => '/usr/bin/qemu-system-x86_64',
3389 };
3390 sub get_command_for_arch($) {
3391 my ($arch) = @_;
3392 return '/usr/bin/kvm' if is_native($arch);
3393
3394 my $cmd = $Arch2Qemu->{$arch}
3395 or die "don't know how to emulate architecture '$arch'\n";
3396 return $cmd;
3397 }
3398
3399 # To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3400 # to use in a QEMU command line (-cpu element), first array_intersect the result
3401 # of query_supported_ with query_understood_. This is necessary because:
3402 #
3403 # a) query_understood_ returns flags the host cannot use and
3404 # b) query_supported_ (rather the QMP call) doesn't actually return CPU
3405 # flags, but CPU settings - with most of them being flags. Those settings
3406 # (and some flags, curiously) cannot be specified as a "-cpu" argument.
3407 #
3408 # query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3409 # expensive. If you need the value returned from this, you can get it much
3410 # cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3411 # $accel being 'kvm' or 'tcg'.
3412 #
3413 # pvestatd calls this function on startup and whenever the QEMU/KVM version
3414 # changes, automatically populating pmxcfs.
3415 #
3416 # Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3417 # since kvm and tcg machines support different flags
3418 #
3419 sub query_supported_cpu_flags {
3420 my ($arch) = @_;
3421
3422 $arch //= get_host_arch();
3423 my $default_machine = $default_machines->{$arch};
3424
3425 my $flags = {};
3426
3427 # FIXME: Once this is merged, the code below should work for ARM as well:
3428 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3429 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3430 $arch eq "aarch64";
3431
3432 my $kvm_supported = defined(kvm_version());
3433 my $qemu_cmd = get_command_for_arch($arch);
3434 my $fakevmid = -1;
3435 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3436
3437 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3438 my $query_supported_run_qemu = sub {
3439 my ($kvm) = @_;
3440
3441 my $flags = {};
3442 my $cmd = [
3443 $qemu_cmd,
3444 '-machine', $default_machine,
3445 '-display', 'none',
3446 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3447 '-mon', 'chardev=qmp,mode=control',
3448 '-pidfile', $pidfile,
3449 '-S', '-daemonize'
3450 ];
3451
3452 if (!$kvm) {
3453 push @$cmd, '-accel', 'tcg';
3454 }
3455
3456 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3457 die "QEMU flag querying VM exited with code " . $rc if $rc;
3458
3459 eval {
3460 my $cmd_result = mon_cmd(
3461 $fakevmid,
3462 'query-cpu-model-expansion',
3463 type => 'full',
3464 model => { name => 'host' }
3465 );
3466
3467 my $props = $cmd_result->{model}->{props};
3468 foreach my $prop (keys %$props) {
3469 next if $props->{$prop} ne '1';
3470 # QEMU returns some flags multiple times, with '_', '.' or '-'
3471 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3472 # We only keep those with underscores, to match /proc/cpuinfo
3473 $prop =~ s/\.|-/_/g;
3474 $flags->{$prop} = 1;
3475 }
3476 };
3477 my $err = $@;
3478
3479 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3480 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3481
3482 die $err if $err;
3483
3484 return [ sort keys %$flags ];
3485 };
3486
3487 # We need to query QEMU twice, since KVM and TCG have different supported flags
3488 PVE::QemuConfig->lock_config($fakevmid, sub {
3489 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3490 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3491
3492 if ($kvm_supported) {
3493 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3494 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3495 }
3496 });
3497
3498 return $flags;
3499 }
3500
3501 # Understood CPU flags are written to a file at 'pve-qemu' compile time
3502 my $understood_cpu_flag_dir = "/usr/share/kvm";
3503 sub query_understood_cpu_flags {
3504 my $arch = get_host_arch();
3505 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3506
3507 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3508 if ! -e $filepath;
3509
3510 my $raw = file_get_contents($filepath);
3511 $raw =~ s/^\s+|\s+$//g;
3512 my @flags = split(/\s+/, $raw);
3513
3514 return \@flags;
3515 }
3516
3517 # Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
3518 # anymore. But smm=off seems to be required when using SeaBIOS and serial display.
3519 my sub should_disable_smm {
3520 my ($conf, $vga) = @_;
3521
3522 return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
3523 $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
3524 }
3525
3526 my sub print_ovmf_drive_commandlines {
3527 my ($conf, $storecfg, $vmid, $arch, $q35, $version_guard) = @_;
3528
3529 my $d = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
3530
3531 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3532
3533 my $var_drive_str = "if=pflash,unit=1,id=drive-efidisk0";
3534 if ($d) {
3535 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3536 my ($path, $format) = $d->@{'file', 'format'};
3537 if ($storeid) {
3538 $path = PVE::Storage::path($storecfg, $d->{file});
3539 if (!defined($format)) {
3540 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3541 $format = qemu_img_format($scfg, $volname);
3542 }
3543 } elsif (!defined($format)) {
3544 die "efidisk format must be specified\n";
3545 }
3546 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3547 if ($path =~ m/^rbd:/) {
3548 $var_drive_str .= ',cache=writeback';
3549 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3550 }
3551 $var_drive_str .= ",format=$format,file=$path";
3552
3553 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $format eq 'raw' && $version_guard->(4, 1, 2);
3554 $var_drive_str .= ',readonly=on' if drive_is_read_only($conf, $d);
3555 } else {
3556 log_warn("no efidisk configured! Using temporary efivars disk.");
3557 my $path = "/tmp/$vmid-ovmf.fd";
3558 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3559 $var_drive_str .= ",format=raw,file=$path";
3560 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $version_guard->(4, 1, 2);
3561 }
3562
3563 return ("if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code", $var_drive_str);
3564 }
3565
3566 sub config_to_command {
3567 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3568 $pbs_backing) = @_;
3569
3570 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3571 my $devices = [];
3572 my $bridges = {};
3573 my $ostype = $conf->{ostype};
3574 my $winversion = windows_version($ostype);
3575 my $kvm = $conf->{kvm};
3576 my $nodename = nodename();
3577
3578 my $arch = get_vm_arch($conf);
3579 my $kvm_binary = get_command_for_arch($arch);
3580 my $kvmver = kvm_user_version($kvm_binary);
3581
3582 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3583 $kvmver //= "undefined";
3584 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3585 }
3586
3587 my $add_pve_version = min_version($kvmver, 4, 1);
3588
3589 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3590 my $machine_version = extract_version($machine_type, $kvmver);
3591 $kvm //= 1 if is_native($arch);
3592
3593 $machine_version =~ m/(\d+)\.(\d+)/;
3594 my ($machine_major, $machine_minor) = ($1, $2);
3595
3596 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3597 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3598 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3599 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3600 ." please upgrade node '$nodename'\n"
3601 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3602 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3603 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3604 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3605 ." node '$nodename'\n";
3606 }
3607
3608 # if a specific +pve version is required for a feature, use $version_guard
3609 # instead of min_version to allow machines to be run with the minimum
3610 # required version
3611 my $required_pve_version = 0;
3612 my $version_guard = sub {
3613 my ($major, $minor, $pve) = @_;
3614 return 0 if !min_version($machine_version, $major, $minor, $pve);
3615 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3616 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3617 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3618 return 1;
3619 };
3620
3621 if ($kvm && !defined kvm_version()) {
3622 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3623 ." or enable in BIOS.\n";
3624 }
3625
3626 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3627 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3628 my $use_old_bios_files = undef;
3629 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3630
3631 my $cmd = [];
3632 if ($conf->{affinity}) {
3633 push @$cmd, '/usr/bin/taskset', '--cpu-list', '--all-tasks', $conf->{affinity};
3634 }
3635
3636 push @$cmd, $kvm_binary;
3637
3638 push @$cmd, '-id', $vmid;
3639
3640 my $vmname = $conf->{name} || "vm$vmid";
3641
3642 push @$cmd, '-name', "$vmname,debug-threads=on";
3643
3644 push @$cmd, '-no-shutdown';
3645
3646 my $use_virtio = 0;
3647
3648 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3649 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3650 push @$cmd, '-mon', "chardev=qmp,mode=control";
3651
3652 if (min_version($machine_version, 2, 12)) {
3653 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3654 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3655 }
3656
3657 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3658
3659 push @$cmd, '-daemonize';
3660
3661 if ($conf->{smbios1}) {
3662 my $smbios_conf = parse_smbios1($conf->{smbios1});
3663 if ($smbios_conf->{base64}) {
3664 # Do not pass base64 flag to qemu
3665 delete $smbios_conf->{base64};
3666 my $smbios_string = "";
3667 foreach my $key (keys %$smbios_conf) {
3668 my $value;
3669 if ($key eq "uuid") {
3670 $value = $smbios_conf->{uuid}
3671 } else {
3672 $value = decode_base64($smbios_conf->{$key});
3673 }
3674 # qemu accepts any binary data, only commas need escaping by double comma
3675 $value =~ s/,/,,/g;
3676 $smbios_string .= "," . $key . "=" . $value if $value;
3677 }
3678 push @$cmd, '-smbios', "type=1" . $smbios_string;
3679 } else {
3680 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3681 }
3682 }
3683
3684 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3685 my ($code_drive_str, $var_drive_str) =
3686 print_ovmf_drive_commandlines($conf, $storecfg, $vmid, $arch, $q35, $version_guard);
3687 push $cmd->@*, '-drive', $code_drive_str;
3688 push $cmd->@*, '-drive', $var_drive_str;
3689 }
3690
3691 if ($q35) { # tell QEMU to load q35 config early
3692 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3693 if (min_version($machine_version, 4, 0)) {
3694 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3695 } else {
3696 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3697 }
3698 }
3699
3700 if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
3701 push @$cmd, $fixups->@*;
3702 }
3703
3704 if ($conf->{vmgenid}) {
3705 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3706 }
3707
3708 # add usb controllers
3709 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3710 $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES, $machine_version);
3711 push @$devices, @usbcontrollers if @usbcontrollers;
3712 my $vga = parse_vga($conf->{vga});
3713
3714 my $qxlnum = vga_conf_has_spice($conf->{vga});
3715 $vga->{type} = 'qxl' if $qxlnum;
3716
3717 if (!$vga->{type}) {
3718 if ($arch eq 'aarch64') {
3719 $vga->{type} = 'virtio';
3720 } elsif (min_version($machine_version, 2, 9)) {
3721 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3722 } else {
3723 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3724 }
3725 }
3726
3727 # enable absolute mouse coordinates (needed by vnc)
3728 my $tablet = $conf->{tablet};
3729 if (!defined($tablet)) {
3730 $tablet = $defaults->{tablet};
3731 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3732 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3733 }
3734
3735 if ($tablet) {
3736 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3737 my $kbd = print_keyboarddevice_full($conf, $arch);
3738 push @$devices, '-device', $kbd if defined($kbd);
3739 }
3740
3741 my $bootorder = device_bootorder($conf);
3742
3743 # host pci device passthrough
3744 my ($kvm_off, $gpu_passthrough, $legacy_igd) = PVE::QemuServer::PCI::print_hostpci_devices(
3745 $vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder);
3746
3747 # usb devices
3748 my $usb_dev_features = {};
3749 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3750
3751 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3752 $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder, $machine_version);
3753 push @$devices, @usbdevices if @usbdevices;
3754
3755 # serial devices
3756 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3757 my $path = $conf->{"serial$i"} or next;
3758 if ($path eq 'socket') {
3759 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3760 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3761 # On aarch64, serial0 is the UART device. QEMU only allows
3762 # connecting UART devices via the '-serial' command line, as
3763 # the device has a fixed slot on the hardware...
3764 if ($arch eq 'aarch64' && $i == 0) {
3765 push @$devices, '-serial', "chardev:serial$i";
3766 } else {
3767 push @$devices, '-device', "isa-serial,chardev=serial$i";
3768 }
3769 } else {
3770 die "no such serial device\n" if ! -c $path;
3771 push @$devices, '-chardev', "tty,id=serial$i,path=$path";
3772 push @$devices, '-device', "isa-serial,chardev=serial$i";
3773 }
3774 }
3775
3776 # parallel devices
3777 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3778 if (my $path = $conf->{"parallel$i"}) {
3779 die "no such parallel device\n" if ! -c $path;
3780 my $devtype = $path =~ m!^/dev/usb/lp! ? 'tty' : 'parport';
3781 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3782 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3783 }
3784 }
3785
3786 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3787 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3788 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3789 push @$devices, @$audio_devs;
3790 }
3791
3792 add_tpm_device($vmid, $devices, $conf);
3793
3794 my $sockets = 1;
3795 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3796 $sockets = $conf->{sockets} if $conf->{sockets};
3797
3798 my $cores = $conf->{cores} || 1;
3799
3800 my $maxcpus = $sockets * $cores;
3801
3802 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3803
3804 my $allowed_vcpus = $cpuinfo->{cpus};
3805
3806 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3807
3808 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3809 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3810 for (my $i = 2; $i <= $vcpus; $i++) {
3811 my $cpustr = print_cpu_device($conf,$i);
3812 push @$cmd, '-device', $cpustr;
3813 }
3814
3815 } else {
3816
3817 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3818 }
3819 push @$cmd, '-nodefaults';
3820
3821 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3822
3823 push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3824
3825 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3826
3827 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3828 push @$devices, '-device', print_vga_device(
3829 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3830
3831 push @$cmd, '-display', 'egl-headless,gl=core' if $vga->{type} eq 'virtio-gl'; # VIRGL
3832
3833 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3834 push @$cmd, '-vnc', "unix:$socket,password=on";
3835 } else {
3836 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3837 push @$cmd, '-nographic';
3838 }
3839
3840 # time drift fix
3841 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3842 my $useLocaltime = $conf->{localtime};
3843
3844 if ($winversion >= 5) { # windows
3845 $useLocaltime = 1 if !defined($conf->{localtime});
3846
3847 # use time drift fix when acpi is enabled
3848 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3849 $tdf = 1 if !defined($conf->{tdf});
3850 }
3851 }
3852
3853 if ($winversion >= 6) {
3854 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3855 push @$cmd, '-no-hpet';
3856 }
3857
3858 push @$rtcFlags, 'driftfix=slew' if $tdf;
3859
3860 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3861 push @$rtcFlags, "base=$conf->{startdate}";
3862 } elsif ($useLocaltime) {
3863 push @$rtcFlags, 'base=localtime';
3864 }
3865
3866 if ($forcecpu) {
3867 push @$cmd, '-cpu', $forcecpu;
3868 } else {
3869 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3870 }
3871
3872 PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
3873
3874 push @$cmd, '-S' if $conf->{freeze};
3875
3876 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3877
3878 my $guest_agent = parse_guest_agent($conf);
3879
3880 if ($guest_agent->{enabled}) {
3881 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3882 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3883
3884 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3885 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3886 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3887 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3888 } elsif ($guest_agent->{type} eq 'isa') {
3889 push @$devices, '-device', "isa-serial,chardev=qga0";
3890 }
3891 }
3892
3893 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3894 if ($rng && $version_guard->(4, 1, 2)) {
3895 check_rng_source($rng->{source});
3896
3897 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3898 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3899 my $limiter_str = "";
3900 if ($max_bytes) {
3901 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3902 }
3903
3904 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3905 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3906 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3907 }
3908
3909 my $spice_port;
3910
3911 if ($qxlnum || $vga->{type} =~ /^virtio/) {
3912 if ($qxlnum > 1) {
3913 if ($winversion){
3914 for (my $i = 1; $i < $qxlnum; $i++){
3915 push @$devices, '-device', print_vga_device(
3916 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3917 }
3918 } else {
3919 # assume other OS works like Linux
3920 my ($ram, $vram) = ("134217728", "67108864");
3921 if ($vga->{memory}) {
3922 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3923 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3924 }
3925 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3926 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3927 }
3928 }
3929
3930 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3931
3932 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3933 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3934 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3935
3936 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3937 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3938 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3939
3940 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3941 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3942
3943 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3944 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3945 if ($spice_enhancement->{foldersharing}) {
3946 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3947 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3948 }
3949
3950 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3951 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3952 if $spice_enhancement->{videostreaming};
3953
3954 push @$devices, '-spice', "$spice_opts";
3955 }
3956
3957 # enable balloon by default, unless explicitly disabled
3958 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3959 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3960 my $ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr";
3961 $ballooncmd .= ",free-page-reporting=on" if min_version($machine_version, 6, 2);
3962 push @$devices, '-device', $ballooncmd;
3963 }
3964
3965 if ($conf->{watchdog}) {
3966 my $wdopts = parse_watchdog($conf->{watchdog});
3967 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3968 my $watchdog = $wdopts->{model} || 'i6300esb';
3969 push @$devices, '-device', "$watchdog$pciaddr";
3970 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3971 }
3972
3973 my $vollist = [];
3974 my $scsicontroller = {};
3975 my $ahcicontroller = {};
3976 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3977
3978 # Add iscsi initiator name if available
3979 if (my $initiator = get_initiator_name()) {
3980 push @$devices, '-iscsi', "initiator-name=$initiator";
3981 }
3982
3983 PVE::QemuConfig->foreach_volume($conf, sub {
3984 my ($ds, $drive) = @_;
3985
3986 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3987 check_volume_storage_type($storecfg, $drive->{file});
3988 push @$vollist, $drive->{file};
3989 }
3990
3991 # ignore efidisk here, already added in bios/fw handling code above
3992 return if $drive->{interface} eq 'efidisk';
3993 # similar for TPM
3994 return if $drive->{interface} eq 'tpmstate';
3995
3996 $use_virtio = 1 if $ds =~ m/^virtio/;
3997
3998 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3999
4000 if ($drive->{interface} eq 'virtio'){
4001 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
4002 }
4003
4004 if ($drive->{interface} eq 'scsi') {
4005
4006 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
4007
4008 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
4009 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
4010
4011 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
4012 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
4013
4014 my $iothread = '';
4015 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
4016 $iothread .= ",iothread=iothread-$controller_prefix$controller";
4017 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
4018 } elsif ($drive->{iothread}) {
4019 log_warn(
4020 "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n"
4021 );
4022 }
4023
4024 my $queues = '';
4025 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
4026 $queues = ",num_queues=$drive->{queues}";
4027 }
4028
4029 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
4030 if !$scsicontroller->{$controller};
4031 $scsicontroller->{$controller}=1;
4032 }
4033
4034 if ($drive->{interface} eq 'sata') {
4035 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
4036 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
4037 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
4038 if !$ahcicontroller->{$controller};
4039 $ahcicontroller->{$controller}=1;
4040 }
4041
4042 my $pbs_conf = $pbs_backing->{$ds};
4043 my $pbs_name = undef;
4044 if ($pbs_conf) {
4045 $pbs_name = "drive-$ds-pbs";
4046 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
4047 }
4048
4049 my $drive_cmd = print_drive_commandline_full(
4050 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
4051
4052 # extra protection for templates, but SATA and IDE don't support it..
4053 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
4054
4055 push @$devices, '-drive',$drive_cmd;
4056 push @$devices, '-device', print_drivedevice_full(
4057 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
4058 });
4059
4060 for (my $i = 0; $i < $MAX_NETS; $i++) {
4061 my $netname = "net$i";
4062
4063 next if !$conf->{$netname};
4064 my $d = parse_net($conf->{$netname});
4065 next if !$d;
4066 # save the MAC addr here (could be auto-gen. in some odd setups) for FDB registering later?
4067
4068 $use_virtio = 1 if $d->{model} eq 'virtio';
4069
4070 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
4071
4072 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
4073 push @$devices, '-netdev', $netdevfull;
4074
4075 my $netdevicefull = print_netdevice_full(
4076 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version);
4077
4078 push @$devices, '-device', $netdevicefull;
4079 }
4080
4081 if ($conf->{ivshmem}) {
4082 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
4083
4084 my $bus;
4085 if ($q35) {
4086 $bus = print_pcie_addr("ivshmem");
4087 } else {
4088 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
4089 }
4090
4091 my $ivshmem_name = $ivshmem->{name} // $vmid;
4092 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
4093
4094 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
4095 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
4096 .",size=$ivshmem->{size}M";
4097 }
4098
4099 # pci.4 is nested in pci.1
4100 $bridges->{1} = 1 if $bridges->{4};
4101
4102 if (!$q35) { # add pci bridges
4103 if (min_version($machine_version, 2, 3)) {
4104 $bridges->{1} = 1;
4105 $bridges->{2} = 1;
4106 }
4107 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
4108 }
4109
4110 for my $k (sort {$b cmp $a} keys %$bridges) {
4111 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
4112
4113 my $k_name = $k;
4114 if ($k == 2 && $legacy_igd) {
4115 $k_name = "$k-igd";
4116 }
4117 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
4118 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
4119
4120 if ($q35) { # add after -readconfig pve-q35.cfg
4121 splice @$devices, 2, 0, '-device', $devstr;
4122 } else {
4123 unshift @$devices, '-device', $devstr if $k > 0;
4124 }
4125 }
4126
4127 if (!$kvm) {
4128 push @$machineFlags, 'accel=tcg';
4129 }
4130
4131 push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga);
4132
4133 my $machine_type_min = $machine_type;
4134 if ($add_pve_version) {
4135 $machine_type_min =~ s/\+pve\d+$//;
4136 $machine_type_min .= "+pve$required_pve_version";
4137 }
4138 push @$machineFlags, "type=${machine_type_min}";
4139
4140 push @$cmd, @$devices;
4141 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
4142 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
4143 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
4144
4145 if (my $vmstate = $conf->{vmstate}) {
4146 my $statepath = PVE::Storage::path($storecfg, $vmstate);
4147 push @$vollist, $vmstate;
4148 push @$cmd, '-loadstate', $statepath;
4149 print "activating and using '$vmstate' as vmstate\n";
4150 }
4151
4152 if (PVE::QemuConfig->is_template($conf)) {
4153 # needed to workaround base volumes being read-only
4154 push @$cmd, '-snapshot';
4155 }
4156
4157 # add custom args
4158 if ($conf->{args}) {
4159 my $aa = PVE::Tools::split_args($conf->{args});
4160 push @$cmd, @$aa;
4161 }
4162
4163 return wantarray ? ($cmd, $vollist, $spice_port) : $cmd;
4164 }
4165
4166 sub check_rng_source {
4167 my ($source) = @_;
4168
4169 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4170 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4171 if ! -e $source;
4172
4173 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4174 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4175 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4176 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4177 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4178 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4179 ." to the host.\n";
4180 }
4181 }
4182
4183 sub spice_port {
4184 my ($vmid) = @_;
4185
4186 my $res = mon_cmd($vmid, 'query-spice');
4187
4188 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
4189 }
4190
4191 sub vm_devices_list {
4192 my ($vmid) = @_;
4193
4194 my $res = mon_cmd($vmid, 'query-pci');
4195 my $devices_to_check = [];
4196 my $devices = {};
4197 foreach my $pcibus (@$res) {
4198 push @$devices_to_check, @{$pcibus->{devices}},
4199 }
4200
4201 while (@$devices_to_check) {
4202 my $to_check = [];
4203 for my $d (@$devices_to_check) {
4204 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
4205 next if !$d->{'pci_bridge'} || !$d->{'pci_bridge'}->{devices};
4206
4207 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4208 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
4209 }
4210 $devices_to_check = $to_check;
4211 }
4212
4213 my $resblock = mon_cmd($vmid, 'query-block');
4214 foreach my $block (@$resblock) {
4215 if($block->{device} =~ m/^drive-(\S+)/){
4216 $devices->{$1} = 1;
4217 }
4218 }
4219
4220 my $resmice = mon_cmd($vmid, 'query-mice');
4221 foreach my $mice (@$resmice) {
4222 if ($mice->{name} eq 'QEMU HID Tablet') {
4223 $devices->{tablet} = 1;
4224 last;
4225 }
4226 }
4227
4228 # for usb devices there is no query-usb
4229 # but we can iterate over the entries in
4230 # qom-list path=/machine/peripheral
4231 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4232 foreach my $per (@$resperipheral) {
4233 if ($per->{name} =~ m/^usb(?:redirdev)?\d+$/) {
4234 $devices->{$per->{name}} = 1;
4235 }
4236 }
4237
4238 return $devices;
4239 }
4240
4241 sub vm_deviceplug {
4242 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4243
4244 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4245
4246 my $devices_list = vm_devices_list($vmid);
4247 return 1 if defined($devices_list->{$deviceid});
4248
4249 # add PCI bridge if we need it for the device
4250 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4251
4252 if ($deviceid eq 'tablet') {
4253 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4254 } elsif ($deviceid eq 'keyboard') {
4255 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4256 } elsif ($deviceid =~ m/^usbredirdev(\d+)$/) {
4257 my $id = $1;
4258 qemu_spice_usbredir_chardev_add($vmid, "usbredirchardev$id");
4259 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_spice_usbdevice($id, "xhci", $id + 1));
4260 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4261 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device, {}, $1 + 1));
4262 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4263 qemu_iothread_add($vmid, $deviceid, $device);
4264
4265 qemu_driveadd($storecfg, $vmid, $device);
4266 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4267
4268 qemu_deviceadd($vmid, $devicefull);
4269 eval { qemu_deviceaddverify($vmid, $deviceid); };
4270 if (my $err = $@) {
4271 eval { qemu_drivedel($vmid, $deviceid); };
4272 warn $@ if $@;
4273 die $err;
4274 }
4275 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4276 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4277 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4278 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4279
4280 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4281
4282 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4283 qemu_iothread_add($vmid, $deviceid, $device);
4284 $devicefull .= ",iothread=iothread-$deviceid";
4285 }
4286
4287 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4288 $devicefull .= ",num_queues=$device->{queues}";
4289 }
4290
4291 qemu_deviceadd($vmid, $devicefull);
4292 qemu_deviceaddverify($vmid, $deviceid);
4293 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4294 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4295 qemu_driveadd($storecfg, $vmid, $device);
4296
4297 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4298 eval { qemu_deviceadd($vmid, $devicefull); };
4299 if (my $err = $@) {
4300 eval { qemu_drivedel($vmid, $deviceid); };
4301 warn $@ if $@;
4302 die $err;
4303 }
4304 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4305 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4306
4307 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4308 my $machine_version = PVE::QemuServer::Machine::extract_version($machine_type);
4309 my $use_old_bios_files = undef;
4310 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4311
4312 my $netdevicefull = print_netdevice_full(
4313 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type, $machine_version);
4314 qemu_deviceadd($vmid, $netdevicefull);
4315 eval {
4316 qemu_deviceaddverify($vmid, $deviceid);
4317 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4318 };
4319 if (my $err = $@) {
4320 eval { qemu_netdevdel($vmid, $deviceid); };
4321 warn $@ if $@;
4322 die $err;
4323 }
4324 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4325 my $bridgeid = $2;
4326 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4327 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4328
4329 qemu_deviceadd($vmid, $devicefull);
4330 qemu_deviceaddverify($vmid, $deviceid);
4331 } else {
4332 die "can't hotplug device '$deviceid'\n";
4333 }
4334
4335 return 1;
4336 }
4337
4338 # fixme: this should raise exceptions on error!
4339 sub vm_deviceunplug {
4340 my ($vmid, $conf, $deviceid) = @_;
4341
4342 my $devices_list = vm_devices_list($vmid);
4343 return 1 if !defined($devices_list->{$deviceid});
4344
4345 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4346 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4347
4348 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard' || $deviceid eq 'xhci') {
4349 qemu_devicedel($vmid, $deviceid);
4350 } elsif ($deviceid =~ m/^usbredirdev\d+$/) {
4351 qemu_devicedel($vmid, $deviceid);
4352 qemu_devicedelverify($vmid, $deviceid);
4353 } elsif ($deviceid =~ m/^usb\d+$/) {
4354 qemu_devicedel($vmid, $deviceid);
4355 qemu_devicedelverify($vmid, $deviceid);
4356 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4357 my $device = parse_drive($deviceid, $conf->{$deviceid});
4358
4359 qemu_devicedel($vmid, $deviceid);
4360 qemu_devicedelverify($vmid, $deviceid);
4361 qemu_drivedel($vmid, $deviceid);
4362 qemu_iothread_del($vmid, $deviceid, $device);
4363 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4364 qemu_devicedel($vmid, $deviceid);
4365 qemu_devicedelverify($vmid, $deviceid);
4366 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4367 my $device = parse_drive($deviceid, $conf->{$deviceid});
4368
4369 qemu_devicedel($vmid, $deviceid);
4370 qemu_devicedelverify($vmid, $deviceid);
4371 qemu_drivedel($vmid, $deviceid);
4372 qemu_deletescsihw($conf, $vmid, $deviceid);
4373
4374 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4375 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4376 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4377 qemu_devicedel($vmid, $deviceid);
4378 qemu_devicedelverify($vmid, $deviceid);
4379 qemu_netdevdel($vmid, $deviceid);
4380 } else {
4381 die "can't unplug device '$deviceid'\n";
4382 }
4383
4384 return 1;
4385 }
4386
4387 sub qemu_spice_usbredir_chardev_add {
4388 my ($vmid, $id) = @_;
4389
4390 mon_cmd($vmid, "chardev-add" , (
4391 id => $id,
4392 backend => {
4393 type => 'spicevmc',
4394 data => {
4395 type => "usbredir",
4396 },
4397 },
4398 ));
4399 }
4400
4401 sub qemu_deviceadd {
4402 my ($vmid, $devicefull) = @_;
4403
4404 $devicefull = "driver=".$devicefull;
4405 my %options = split(/[=,]/, $devicefull);
4406
4407 mon_cmd($vmid, "device_add" , %options);
4408 }
4409
4410 sub qemu_devicedel {
4411 my ($vmid, $deviceid) = @_;
4412
4413 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
4414 }
4415
4416 sub qemu_iothread_add {
4417 my ($vmid, $deviceid, $device) = @_;
4418
4419 if ($device->{iothread}) {
4420 my $iothreads = vm_iothreads_list($vmid);
4421 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4422 }
4423 }
4424
4425 sub qemu_iothread_del {
4426 my ($vmid, $deviceid, $device) = @_;
4427
4428 if ($device->{iothread}) {
4429 my $iothreads = vm_iothreads_list($vmid);
4430 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4431 }
4432 }
4433
4434 sub qemu_objectadd {
4435 my ($vmid, $objectid, $qomtype) = @_;
4436
4437 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4438
4439 return 1;
4440 }
4441
4442 sub qemu_objectdel {
4443 my ($vmid, $objectid) = @_;
4444
4445 mon_cmd($vmid, "object-del", id => $objectid);
4446
4447 return 1;
4448 }
4449
4450 sub qemu_driveadd {
4451 my ($storecfg, $vmid, $device) = @_;
4452
4453 my $kvmver = get_running_qemu_version($vmid);
4454 my $io_uring = min_version($kvmver, 6, 0);
4455 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4456 $drive =~ s/\\/\\\\/g;
4457 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4458
4459 # If the command succeeds qemu prints: "OK"
4460 return 1 if $ret =~ m/OK/s;
4461
4462 die "adding drive failed: $ret\n";
4463 }
4464
4465 sub qemu_drivedel {
4466 my ($vmid, $deviceid) = @_;
4467
4468 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4469 $ret =~ s/^\s+//;
4470
4471 return 1 if $ret eq "";
4472
4473 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4474 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4475
4476 die "deleting drive $deviceid failed : $ret\n";
4477 }
4478
4479 sub qemu_deviceaddverify {
4480 my ($vmid, $deviceid) = @_;
4481
4482 for (my $i = 0; $i <= 5; $i++) {
4483 my $devices_list = vm_devices_list($vmid);
4484 return 1 if defined($devices_list->{$deviceid});
4485 sleep 1;
4486 }
4487
4488 die "error on hotplug device '$deviceid'\n";
4489 }
4490
4491
4492 sub qemu_devicedelverify {
4493 my ($vmid, $deviceid) = @_;
4494
4495 # need to verify that the device is correctly removed as device_del
4496 # is async and empty return is not reliable
4497
4498 for (my $i = 0; $i <= 5; $i++) {
4499 my $devices_list = vm_devices_list($vmid);
4500 return 1 if !defined($devices_list->{$deviceid});
4501 sleep 1;
4502 }
4503
4504 die "error on hot-unplugging device '$deviceid'\n";
4505 }
4506
4507 sub qemu_findorcreatescsihw {
4508 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4509
4510 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4511
4512 my $scsihwid="$controller_prefix$controller";
4513 my $devices_list = vm_devices_list($vmid);
4514
4515 if (!defined($devices_list->{$scsihwid})) {
4516 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4517 }
4518
4519 return 1;
4520 }
4521
4522 sub qemu_deletescsihw {
4523 my ($conf, $vmid, $opt) = @_;
4524
4525 my $device = parse_drive($opt, $conf->{$opt});
4526
4527 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4528 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4529 return 1;
4530 }
4531
4532 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4533
4534 my $devices_list = vm_devices_list($vmid);
4535 foreach my $opt (keys %{$devices_list}) {
4536 if (is_valid_drivename($opt)) {
4537 my $drive = parse_drive($opt, $conf->{$opt});
4538 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4539 return 1;
4540 }
4541 }
4542 }
4543
4544 my $scsihwid="scsihw$controller";
4545
4546 vm_deviceunplug($vmid, $conf, $scsihwid);
4547
4548 return 1;
4549 }
4550
4551 sub qemu_add_pci_bridge {
4552 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4553
4554 my $bridges = {};
4555
4556 my $bridgeid;
4557
4558 print_pci_addr($device, $bridges, $arch, $machine_type);
4559
4560 while (my ($k, $v) = each %$bridges) {
4561 $bridgeid = $k;
4562 }
4563 return 1 if !defined($bridgeid) || $bridgeid < 1;
4564
4565 my $bridge = "pci.$bridgeid";
4566 my $devices_list = vm_devices_list($vmid);
4567
4568 if (!defined($devices_list->{$bridge})) {
4569 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4570 }
4571
4572 return 1;
4573 }
4574
4575 sub qemu_set_link_status {
4576 my ($vmid, $device, $up) = @_;
4577
4578 mon_cmd($vmid, "set_link", name => $device,
4579 up => $up ? JSON::true : JSON::false);
4580 }
4581
4582 sub qemu_netdevadd {
4583 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4584
4585 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4586 my %options = split(/[=,]/, $netdev);
4587
4588 if (defined(my $vhost = $options{vhost})) {
4589 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4590 }
4591
4592 if (defined(my $queues = $options{queues})) {
4593 $options{queues} = $queues + 0;
4594 }
4595
4596 mon_cmd($vmid, "netdev_add", %options);
4597 return 1;
4598 }
4599
4600 sub qemu_netdevdel {
4601 my ($vmid, $deviceid) = @_;
4602
4603 mon_cmd($vmid, "netdev_del", id => $deviceid);
4604 }
4605
4606 sub qemu_usb_hotplug {
4607 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4608
4609 return if !$device;
4610
4611 # remove the old one first
4612 vm_deviceunplug($vmid, $conf, $deviceid);
4613
4614 # check if xhci controller is necessary and available
4615 my $devicelist = vm_devices_list($vmid);
4616
4617 if (!$devicelist->{xhci}) {
4618 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4619 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr));
4620 }
4621
4622 # print_usbdevice_full expects the parsed device
4623 my $d = parse_usb_device($device->{host});
4624 $d->{usb3} = $device->{usb3};
4625
4626 # add the new one
4627 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d, $arch, $machine_type);
4628 }
4629
4630 sub qemu_cpu_hotplug {
4631 my ($vmid, $conf, $vcpus) = @_;
4632
4633 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4634
4635 my $sockets = 1;
4636 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4637 $sockets = $conf->{sockets} if $conf->{sockets};
4638 my $cores = $conf->{cores} || 1;
4639 my $maxcpus = $sockets * $cores;
4640
4641 $vcpus = $maxcpus if !$vcpus;
4642
4643 die "you can't add more vcpus than maxcpus\n"
4644 if $vcpus > $maxcpus;
4645
4646 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4647
4648 if ($vcpus < $currentvcpus) {
4649
4650 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4651
4652 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4653 qemu_devicedel($vmid, "cpu$i");
4654 my $retry = 0;
4655 my $currentrunningvcpus = undef;
4656 while (1) {
4657 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4658 last if scalar(@{$currentrunningvcpus}) == $i-1;
4659 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4660 $retry++;
4661 sleep 1;
4662 }
4663 #update conf after each succesfull cpu unplug
4664 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4665 PVE::QemuConfig->write_config($vmid, $conf);
4666 }
4667 } else {
4668 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4669 }
4670
4671 return;
4672 }
4673
4674 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4675 die "vcpus in running vm does not match its configuration\n"
4676 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4677
4678 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4679
4680 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4681 my $cpustr = print_cpu_device($conf, $i);
4682 qemu_deviceadd($vmid, $cpustr);
4683
4684 my $retry = 0;
4685 my $currentrunningvcpus = undef;
4686 while (1) {
4687 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4688 last if scalar(@{$currentrunningvcpus}) == $i;
4689 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4690 sleep 1;
4691 $retry++;
4692 }
4693 #update conf after each succesfull cpu hotplug
4694 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4695 PVE::QemuConfig->write_config($vmid, $conf);
4696 }
4697 } else {
4698
4699 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4700 mon_cmd($vmid, "cpu-add", id => int($i));
4701 }
4702 }
4703 }
4704
4705 sub qemu_block_set_io_throttle {
4706 my ($vmid, $deviceid,
4707 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4708 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4709 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4710 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4711
4712 return if !check_running($vmid) ;
4713
4714 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4715 bps => int($bps),
4716 bps_rd => int($bps_rd),
4717 bps_wr => int($bps_wr),
4718 iops => int($iops),
4719 iops_rd => int($iops_rd),
4720 iops_wr => int($iops_wr),
4721 bps_max => int($bps_max),
4722 bps_rd_max => int($bps_rd_max),
4723 bps_wr_max => int($bps_wr_max),
4724 iops_max => int($iops_max),
4725 iops_rd_max => int($iops_rd_max),
4726 iops_wr_max => int($iops_wr_max),
4727 bps_max_length => int($bps_max_length),
4728 bps_rd_max_length => int($bps_rd_max_length),
4729 bps_wr_max_length => int($bps_wr_max_length),
4730 iops_max_length => int($iops_max_length),
4731 iops_rd_max_length => int($iops_rd_max_length),
4732 iops_wr_max_length => int($iops_wr_max_length),
4733 );
4734
4735 }
4736
4737 sub qemu_block_resize {
4738 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4739
4740 my $running = check_running($vmid);
4741
4742 $size = 0 if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4743
4744 return if !$running;
4745
4746 my $padding = (1024 - $size % 1024) % 1024;
4747 $size = $size + $padding;
4748
4749 mon_cmd(
4750 $vmid,
4751 "block_resize",
4752 device => $deviceid,
4753 size => int($size),
4754 timeout => 60,
4755 );
4756 }
4757
4758 sub qemu_volume_snapshot {
4759 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4760
4761 my $running = check_running($vmid);
4762
4763 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4764 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4765 } else {
4766 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4767 }
4768 }
4769
4770 sub qemu_volume_snapshot_delete {
4771 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4772
4773 my $running = check_running($vmid);
4774
4775 if($running) {
4776
4777 $running = undef;
4778 my $conf = PVE::QemuConfig->load_config($vmid);
4779 PVE::QemuConfig->foreach_volume($conf, sub {
4780 my ($ds, $drive) = @_;
4781 $running = 1 if $drive->{file} eq $volid;
4782 });
4783 }
4784
4785 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4786 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
4787 } else {
4788 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4789 }
4790 }
4791
4792 sub set_migration_caps {
4793 my ($vmid, $savevm) = @_;
4794
4795 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4796
4797 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4798 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4799
4800 my $cap_ref = [];
4801
4802 my $enabled_cap = {
4803 "auto-converge" => 1,
4804 "xbzrle" => 1,
4805 "x-rdma-pin-all" => 0,
4806 "zero-blocks" => 0,
4807 "compress" => 0,
4808 "dirty-bitmaps" => $dirty_bitmaps,
4809 };
4810
4811 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4812
4813 for my $supported_capability (@$supported_capabilities) {
4814 push @$cap_ref, {
4815 capability => $supported_capability->{capability},
4816 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4817 };
4818 }
4819
4820 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4821 }
4822
4823 sub foreach_volid {
4824 my ($conf, $func, @param) = @_;
4825
4826 my $volhash = {};
4827
4828 my $test_volid = sub {
4829 my ($key, $drive, $snapname) = @_;
4830
4831 my $volid = $drive->{file};
4832 return if !$volid;
4833
4834 $volhash->{$volid}->{cdrom} //= 1;
4835 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4836
4837 my $replicate = $drive->{replicate} // 1;
4838 $volhash->{$volid}->{replicate} //= 0;
4839 $volhash->{$volid}->{replicate} = 1 if $replicate;
4840
4841 $volhash->{$volid}->{shared} //= 0;
4842 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4843
4844 $volhash->{$volid}->{referenced_in_config} //= 0;
4845 $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname);
4846
4847 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4848 if defined($snapname);
4849
4850 my $size = $drive->{size};
4851 $volhash->{$volid}->{size} //= $size if $size;
4852
4853 $volhash->{$volid}->{is_vmstate} //= 0;
4854 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4855
4856 $volhash->{$volid}->{is_tpmstate} //= 0;
4857 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4858
4859 $volhash->{$volid}->{is_unused} //= 0;
4860 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4861
4862 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4863 };
4864
4865 my $include_opts = {
4866 extra_keys => ['vmstate'],
4867 include_unused => 1,
4868 };
4869
4870 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4871 foreach my $snapname (keys %{$conf->{snapshots}}) {
4872 my $snap = $conf->{snapshots}->{$snapname};
4873 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4874 }
4875
4876 foreach my $volid (keys %$volhash) {
4877 &$func($volid, $volhash->{$volid}, @param);
4878 }
4879 }
4880
4881 my $fast_plug_option = {
4882 'lock' => 1,
4883 'name' => 1,
4884 'onboot' => 1,
4885 'shares' => 1,
4886 'startup' => 1,
4887 'description' => 1,
4888 'protection' => 1,
4889 'vmstatestorage' => 1,
4890 'hookscript' => 1,
4891 'tags' => 1,
4892 };
4893
4894 for my $opt (keys %$confdesc_cloudinit) {
4895 $fast_plug_option->{$opt} = 1;
4896 };
4897
4898 # hotplug changes in [PENDING]
4899 # $selection hash can be used to only apply specified options, for
4900 # example: { cores => 1 } (only apply changed 'cores')
4901 # $errors ref is used to return error messages
4902 sub vmconfig_hotplug_pending {
4903 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4904
4905 my $defaults = load_defaults();
4906 my $arch = get_vm_arch($conf);
4907 my $machine_type = get_vm_machine($conf, undef, $arch);
4908
4909 # commit values which do not have any impact on running VM first
4910 # Note: those option cannot raise errors, we we do not care about
4911 # $selection and always apply them.
4912
4913 my $add_error = sub {
4914 my ($opt, $msg) = @_;
4915 $errors->{$opt} = "hotplug problem - $msg";
4916 };
4917
4918 my $cloudinit_pending_properties = PVE::QemuServer::cloudinit_pending_properties();
4919
4920 my $cloudinit_record_changed = sub {
4921 my ($conf, $opt, $old, $new) = @_;
4922 return if !$cloudinit_pending_properties->{$opt};
4923
4924 my $ci = ($conf->{cloudinit} //= {});
4925
4926 my $recorded = $ci->{$opt};
4927 my %added = map { $_ => 1 } PVE::Tools::split_list(delete($ci->{added}) // '');
4928
4929 if (defined($new)) {
4930 if (defined($old)) {
4931 # an existing value is being modified
4932 if (defined($recorded)) {
4933 # the value was already not in sync
4934 if ($new eq $recorded) {
4935 # a value is being reverted to the cloud-init state:
4936 delete $ci->{$opt};
4937 delete $added{$opt};
4938 } else {
4939 # the value was changed multiple times, do nothing
4940 }
4941 } elsif ($added{$opt}) {
4942 # the value had been marked as added and is being changed, do nothing
4943 } else {
4944 # the value is new, record it:
4945 $ci->{$opt} = $old;
4946 }
4947 } else {
4948 # a new value is being added
4949 if (defined($recorded)) {
4950 # it was already not in sync
4951 if ($new eq $recorded) {
4952 # a value is being reverted to the cloud-init state:
4953 delete $ci->{$opt};
4954 delete $added{$opt};
4955 } else {
4956 # the value had temporarily been removed, do nothing
4957 }
4958 } elsif ($added{$opt}) {
4959 # the value had been marked as added already, do nothing
4960 } else {
4961 # the value is new, add it
4962 $added{$opt} = 1;
4963 }
4964 }
4965 } elsif (!defined($old)) {
4966 # a non-existent value is being removed? ignore...
4967 } else {
4968 # a value is being deleted
4969 if (defined($recorded)) {
4970 # a value was already recorded, just keep it
4971 } elsif ($added{$opt}) {
4972 # the value was marked as added, remove it
4973 delete $added{$opt};
4974 } else {
4975 # a previously unrecorded value is being removed, record the old value:
4976 $ci->{$opt} = $old;
4977 }
4978 }
4979
4980 my $added = join(',', sort keys %added);
4981 $ci->{added} = $added if length($added);
4982 };
4983
4984 my $changes = 0;
4985 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4986 if ($fast_plug_option->{$opt}) {
4987 my $new = delete $conf->{pending}->{$opt};
4988 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $new);
4989 $conf->{$opt} = $new;
4990 $changes = 1;
4991 }
4992 }
4993
4994 if ($changes) {
4995 PVE::QemuConfig->write_config($vmid, $conf);
4996 }
4997
4998 my $ostype = $conf->{ostype};
4999 my $version = extract_version($machine_type, get_running_qemu_version($vmid));
5000 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
5001 my $usb_hotplug = $hotplug_features->{usb}
5002 && min_version($version, 7, 1)
5003 && defined($ostype) && ($ostype eq 'l26' || windows_version($ostype) > 7);
5004
5005 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
5006 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
5007
5008 foreach my $opt (sort keys %$pending_delete_hash) {
5009 next if $selection && !$selection->{$opt};
5010 my $force = $pending_delete_hash->{$opt}->{force};
5011 eval {
5012 if ($opt eq 'hotplug') {
5013 die "skip\n" if ($conf->{hotplug} =~ /memory/);
5014 } elsif ($opt eq 'tablet') {
5015 die "skip\n" if !$hotplug_features->{usb};
5016 if ($defaults->{tablet}) {
5017 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5018 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5019 if $arch eq 'aarch64';
5020 } else {
5021 vm_deviceunplug($vmid, $conf, 'tablet');
5022 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
5023 }
5024 } elsif ($opt =~ m/^usb(\d+)$/) {
5025 my $index = $1;
5026 die "skip\n" if !$usb_hotplug;
5027 vm_deviceunplug($vmid, $conf, "usbredirdev$index"); # if it's a spice port
5028 vm_deviceunplug($vmid, $conf, $opt);
5029 } elsif ($opt eq 'vcpus') {
5030 die "skip\n" if !$hotplug_features->{cpu};
5031 qemu_cpu_hotplug($vmid, $conf, undef);
5032 } elsif ($opt eq 'balloon') {
5033 # enable balloon device is not hotpluggable
5034 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
5035 # here we reset the ballooning value to memory
5036 my $balloon = $conf->{memory} || $defaults->{memory};
5037 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
5038 } elsif ($fast_plug_option->{$opt}) {
5039 # do nothing
5040 } elsif ($opt =~ m/^net(\d+)$/) {
5041 die "skip\n" if !$hotplug_features->{network};
5042 vm_deviceunplug($vmid, $conf, $opt);
5043 } elsif (is_valid_drivename($opt)) {
5044 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
5045 vm_deviceunplug($vmid, $conf, $opt);
5046 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5047 } elsif ($opt =~ m/^memory$/) {
5048 die "skip\n" if !$hotplug_features->{memory};
5049 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
5050 } elsif ($opt eq 'cpuunits') {
5051 $cgroup->change_cpu_shares(undef);
5052 } elsif ($opt eq 'cpulimit') {
5053 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
5054 } else {
5055 die "skip\n";
5056 }
5057 };
5058 if (my $err = $@) {
5059 &$add_error($opt, $err) if $err ne "skip\n";
5060 } else {
5061 my $old = delete $conf->{$opt};
5062 $cloudinit_record_changed->($conf, $opt, $old, undef);
5063 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5064 }
5065 }
5066
5067 my $cloudinit_opt;
5068 foreach my $opt (keys %{$conf->{pending}}) {
5069 next if $selection && !$selection->{$opt};
5070 my $value = $conf->{pending}->{$opt};
5071 eval {
5072 if ($opt eq 'hotplug') {
5073 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
5074 } elsif ($opt eq 'tablet') {
5075 die "skip\n" if !$hotplug_features->{usb};
5076 if ($value == 1) {
5077 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5078 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5079 if $arch eq 'aarch64';
5080 } elsif ($value == 0) {
5081 vm_deviceunplug($vmid, $conf, 'tablet');
5082 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
5083 }
5084 } elsif ($opt =~ m/^usb(\d+)$/) {
5085 my $index = $1;
5086 die "skip\n" if !$usb_hotplug;
5087 my $d = eval { parse_property_string($usbdesc->{format}, $value) };
5088 my $id = $opt;
5089 if ($d->{host} eq 'spice') {
5090 $id = "usbredirdev$index";
5091 }
5092 qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type);
5093 } elsif ($opt eq 'vcpus') {
5094 die "skip\n" if !$hotplug_features->{cpu};
5095 qemu_cpu_hotplug($vmid, $conf, $value);
5096 } elsif ($opt eq 'balloon') {
5097 # enable/disable balloning device is not hotpluggable
5098 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
5099 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
5100 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
5101
5102 # allow manual ballooning if shares is set to zero
5103 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
5104 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
5105 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
5106 }
5107 } elsif ($opt =~ m/^net(\d+)$/) {
5108 # some changes can be done without hotplug
5109 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
5110 $vmid, $opt, $value, $arch, $machine_type);
5111 } elsif (is_valid_drivename($opt)) {
5112 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
5113 # some changes can be done without hotplug
5114 my $drive = parse_drive($opt, $value);
5115 if (drive_is_cloudinit($drive)) {
5116 $cloudinit_opt = [$opt, $drive];
5117 # apply all the other changes first, then generate the cloudinit disk
5118 die "skip\n";
5119 }
5120 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5121 $vmid, $opt, $value, $arch, $machine_type);
5122 } elsif ($opt =~ m/^memory$/) { #dimms
5123 die "skip\n" if !$hotplug_features->{memory};
5124 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
5125 } elsif ($opt eq 'cpuunits') {
5126 my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
5127 $cgroup->change_cpu_shares($new_cpuunits);
5128 } elsif ($opt eq 'cpulimit') {
5129 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
5130 $cgroup->change_cpu_quota($cpulimit, 100000);
5131 } elsif ($opt eq 'agent') {
5132 vmconfig_update_agent($conf, $opt, $value);
5133 } else {
5134 die "skip\n"; # skip non-hot-pluggable options
5135 }
5136 };
5137 if (my $err = $@) {
5138 &$add_error($opt, $err) if $err ne "skip\n";
5139 } else {
5140 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $value);
5141 $conf->{$opt} = $value;
5142 delete $conf->{pending}->{$opt};
5143 }
5144 }
5145
5146 if (defined($cloudinit_opt)) {
5147 my ($opt, $drive) = @$cloudinit_opt;
5148 my $value = $conf->{pending}->{$opt};
5149 eval {
5150 my $temp = {%$conf, $opt => $value};
5151 PVE::QemuServer::Cloudinit::apply_cloudinit_config($temp, $vmid);
5152 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5153 $vmid, $opt, $value, $arch, $machine_type);
5154 };
5155 if (my $err = $@) {
5156 &$add_error($opt, $err) if $err ne "skip\n";
5157 } else {
5158 $conf->{$opt} = $value;
5159 delete $conf->{pending}->{$opt};
5160 }
5161 }
5162
5163 # unplug xhci controller if no usb device is left
5164 if ($usb_hotplug) {
5165 my $has_usb = 0;
5166 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
5167 next if !defined($conf->{"usb$i"});
5168 $has_usb = 1;
5169 last;
5170 }
5171 if (!$has_usb) {
5172 vm_deviceunplug($vmid, $conf, 'xhci');
5173 }
5174 }
5175
5176 PVE::QemuConfig->write_config($vmid, $conf);
5177
5178 if ($hotplug_features->{cloudinit} && PVE::QemuServer::Cloudinit::has_changes($conf)) {
5179 PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid);
5180 }
5181 }
5182
5183 sub try_deallocate_drive {
5184 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
5185
5186 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
5187 my $volid = $drive->{file};
5188 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
5189 my $sid = PVE::Storage::parse_volume_id($volid);
5190 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
5191
5192 # check if the disk is really unused
5193 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
5194 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
5195 PVE::Storage::vdisk_free($storecfg, $volid);
5196 return 1;
5197 } else {
5198 # If vm is not owner of this disk remove from config
5199 return 1;
5200 }
5201 }
5202
5203 return;
5204 }
5205
5206 sub vmconfig_delete_or_detach_drive {
5207 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
5208
5209 my $drive = parse_drive($opt, $conf->{$opt});
5210
5211 my $rpcenv = PVE::RPCEnvironment::get();
5212 my $authuser = $rpcenv->get_user();
5213
5214 if ($force) {
5215 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
5216 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
5217 } else {
5218 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
5219 }
5220 }
5221
5222
5223
5224 sub vmconfig_apply_pending {
5225 my ($vmid, $conf, $storecfg, $errors, $skip_cloud_init) = @_;
5226
5227 return if !scalar(keys %{$conf->{pending}});
5228
5229 my $add_apply_error = sub {
5230 my ($opt, $msg) = @_;
5231 my $err_msg = "unable to apply pending change $opt : $msg";
5232 $errors->{$opt} = $err_msg;
5233 warn $err_msg;
5234 };
5235
5236 # cold plug
5237
5238 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
5239 foreach my $opt (sort keys %$pending_delete_hash) {
5240 my $force = $pending_delete_hash->{$opt}->{force};
5241 eval {
5242 if ($opt =~ m/^unused/) {
5243 die "internal error";
5244 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5245 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5246 }
5247 };
5248 if (my $err = $@) {
5249 $add_apply_error->($opt, $err);
5250 } else {
5251 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5252 delete $conf->{$opt};
5253 }
5254 }
5255
5256 PVE::QemuConfig->cleanup_pending($conf);
5257
5258 my $generate_cloudinit = $skip_cloud_init ? 0 : undef;
5259
5260 foreach my $opt (keys %{$conf->{pending}}) { # add/change
5261 next if $opt eq 'delete'; # just to be sure
5262 eval {
5263 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5264 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
5265 }
5266 };
5267 if (my $err = $@) {
5268 $add_apply_error->($opt, $err);
5269 } else {
5270
5271 if (is_valid_drivename($opt)) {
5272 my $drive = parse_drive($opt, $conf->{pending}->{$opt});
5273 $generate_cloudinit //= 1 if drive_is_cloudinit($drive);
5274 }
5275
5276 $conf->{$opt} = delete $conf->{pending}->{$opt};
5277 }
5278 }
5279
5280 # write all changes at once to avoid unnecessary i/o
5281 PVE::QemuConfig->write_config($vmid, $conf);
5282 if ($generate_cloudinit) {
5283 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5284 # After successful generation and if there were changes to be applied, update the
5285 # config to drop the {cloudinit} entry.
5286 PVE::QemuConfig->write_config($vmid, $conf);
5287 }
5288 }
5289 }
5290
5291 sub vmconfig_update_net {
5292 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5293
5294 my $newnet = parse_net($value);
5295
5296 if ($conf->{$opt}) {
5297 my $oldnet = parse_net($conf->{$opt});
5298
5299 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5300 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5301 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
5302 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
5303
5304 # for non online change, we try to hot-unplug
5305 die "skip\n" if !$hotplug;
5306 vm_deviceunplug($vmid, $conf, $opt);
5307 } else {
5308
5309 die "internal error" if $opt !~ m/net(\d+)/;
5310 my $iface = "tap${vmid}i$1";
5311
5312 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5313 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5314 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5315 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
5316 PVE::Network::tap_unplug($iface);
5317
5318 if ($have_sdn) {
5319 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5320 } else {
5321 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5322 }
5323 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
5324 # Rate can be applied on its own but any change above needs to
5325 # include the rate in tap_plug since OVS resets everything.
5326 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
5327 }
5328
5329 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
5330 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5331 }
5332
5333 return 1;
5334 }
5335 }
5336
5337 if ($hotplug) {
5338 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5339 } else {
5340 die "skip\n";
5341 }
5342 }
5343
5344 sub vmconfig_update_agent {
5345 my ($conf, $opt, $value) = @_;
5346
5347 die "skip\n" if !$conf->{$opt};
5348
5349 my $hotplug_options = { fstrim_cloned_disks => 1 };
5350
5351 my $old_agent = parse_guest_agent($conf);
5352 my $agent = parse_guest_agent({$opt => $value});
5353
5354 for my $option (keys %$agent) { # added/changed options
5355 next if defined($hotplug_options->{$option});
5356 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5357 }
5358
5359 for my $option (keys %$old_agent) { # removed options
5360 next if defined($hotplug_options->{$option});
5361 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5362 }
5363
5364 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
5365 }
5366
5367 sub vmconfig_update_disk {
5368 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5369
5370 my $drive = parse_drive($opt, $value);
5371
5372 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5373 my $media = $drive->{media} || 'disk';
5374 my $oldmedia = $old_drive->{media} || 'disk';
5375 die "unable to change media type\n" if $media ne $oldmedia;
5376
5377 if (!drive_is_cdrom($old_drive)) {
5378
5379 if ($drive->{file} ne $old_drive->{file}) {
5380
5381 die "skip\n" if !$hotplug;
5382
5383 # unplug and register as unused
5384 vm_deviceunplug($vmid, $conf, $opt);
5385 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5386
5387 } else {
5388 # update existing disk
5389
5390 # skip non hotpluggable value
5391 if (safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5392 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5393 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5394 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5395 safe_string_ne($drive->{ssd}, $old_drive->{ssd}) ||
5396 safe_string_ne($drive->{ro}, $old_drive->{ro})) {
5397 die "skip\n";
5398 }
5399
5400 # apply throttle
5401 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5402 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5403 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5404 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5405 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5406 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5407 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5408 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5409 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5410 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5411 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5412 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5413 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5414 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5415 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5416 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5417 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5418 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5419
5420 qemu_block_set_io_throttle(
5421 $vmid,"drive-$opt",
5422 ($drive->{mbps} || 0)*1024*1024,
5423 ($drive->{mbps_rd} || 0)*1024*1024,
5424 ($drive->{mbps_wr} || 0)*1024*1024,
5425 $drive->{iops} || 0,
5426 $drive->{iops_rd} || 0,
5427 $drive->{iops_wr} || 0,
5428 ($drive->{mbps_max} || 0)*1024*1024,
5429 ($drive->{mbps_rd_max} || 0)*1024*1024,
5430 ($drive->{mbps_wr_max} || 0)*1024*1024,
5431 $drive->{iops_max} || 0,
5432 $drive->{iops_rd_max} || 0,
5433 $drive->{iops_wr_max} || 0,
5434 $drive->{bps_max_length} || 1,
5435 $drive->{bps_rd_max_length} || 1,
5436 $drive->{bps_wr_max_length} || 1,
5437 $drive->{iops_max_length} || 1,
5438 $drive->{iops_rd_max_length} || 1,
5439 $drive->{iops_wr_max_length} || 1,
5440 );
5441
5442 }
5443
5444 return 1;
5445 }
5446
5447 } else { # cdrom
5448
5449 if ($drive->{file} eq 'none') {
5450 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5451 if (drive_is_cloudinit($old_drive)) {
5452 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5453 }
5454 } else {
5455 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5456
5457 # force eject if locked
5458 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5459
5460 if ($path) {
5461 mon_cmd($vmid, "blockdev-change-medium",
5462 id => "$opt", filename => "$path");
5463 }
5464 }
5465
5466 return 1;
5467 }
5468 }
5469
5470 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5471 # hotplug new disks
5472 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5473 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5474 }
5475
5476 sub vmconfig_update_cloudinit_drive {
5477 my ($storecfg, $conf, $vmid) = @_;
5478
5479 my $cloudinit_ds = undef;
5480 my $cloudinit_drive = undef;
5481
5482 PVE::QemuConfig->foreach_volume($conf, sub {
5483 my ($ds, $drive) = @_;
5484 if (PVE::QemuServer::drive_is_cloudinit($drive)) {
5485 $cloudinit_ds = $ds;
5486 $cloudinit_drive = $drive;
5487 }
5488 });
5489
5490 return if !$cloudinit_drive;
5491
5492 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5493 PVE::QemuConfig->write_config($vmid, $conf);
5494 }
5495
5496 my $running = PVE::QemuServer::check_running($vmid);
5497
5498 if ($running) {
5499 my $path = PVE::Storage::path($storecfg, $cloudinit_drive->{file});
5500 if ($path) {
5501 mon_cmd($vmid, "eject", force => JSON::true, id => "$cloudinit_ds");
5502 mon_cmd($vmid, "blockdev-change-medium", id => "$cloudinit_ds", filename => "$path");
5503 }
5504 }
5505 }
5506
5507 # called in locked context by incoming migration
5508 sub vm_migrate_get_nbd_disks {
5509 my ($storecfg, $conf, $replicated_volumes) = @_;
5510
5511 my $local_volumes = {};
5512 PVE::QemuConfig->foreach_volume($conf, sub {
5513 my ($ds, $drive) = @_;
5514
5515 return if drive_is_cdrom($drive);
5516 return if $ds eq 'tpmstate0';
5517
5518 my $volid = $drive->{file};
5519
5520 return if !$volid;
5521
5522 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5523
5524 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5525 return if $scfg->{shared};
5526
5527 # replicated disks re-use existing state via bitmap
5528 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5529 $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
5530 });
5531 return $local_volumes;
5532 }
5533
5534 # called in locked context by incoming migration
5535 sub vm_migrate_alloc_nbd_disks {
5536 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5537
5538 my $nbd = {};
5539 foreach my $opt (sort keys %$source_volumes) {
5540 my ($volid, $storeid, $volname, $drive, $use_existing, $format) = @{$source_volumes->{$opt}};
5541
5542 if ($use_existing) {
5543 $nbd->{$opt}->{drivestr} = print_drive($drive);
5544 $nbd->{$opt}->{volid} = $volid;
5545 $nbd->{$opt}->{replicated} = 1;
5546 next;
5547 }
5548
5549 # storage mapping + volname = regular migration
5550 # storage mapping + format = remote migration
5551 # order of precedence, filtered by whether storage supports it:
5552 # 1. explicit requested format
5553 # 2. format of current volume
5554 # 3. default format of storage
5555 if (!$storagemap->{identity}) {
5556 $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
5557 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5558 if (!$format || !grep { $format eq $_ } @$validFormats) {
5559 if ($volname) {
5560 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5561 my $fileFormat = qemu_img_format($scfg, $volname);
5562 $format = $fileFormat
5563 if grep { $fileFormat eq $_ } @$validFormats;
5564 }
5565 $format //= $defFormat;
5566 }
5567 } else {
5568 # can't happen for remote migration, so $volname is always defined
5569 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5570 $format = qemu_img_format($scfg, $volname);
5571 }
5572
5573 my $size = $drive->{size} / 1024;
5574 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5575 my $newdrive = $drive;
5576 $newdrive->{format} = $format;
5577 $newdrive->{file} = $newvolid;
5578 my $drivestr = print_drive($newdrive);
5579 $nbd->{$opt}->{drivestr} = $drivestr;
5580 $nbd->{$opt}->{volid} = $newvolid;
5581 }
5582
5583 return $nbd;
5584 }
5585
5586 # see vm_start_nolock for parameters, additionally:
5587 # migrate_opts:
5588 # storagemap = parsed storage map for allocating NBD disks
5589 sub vm_start {
5590 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5591
5592 return PVE::QemuConfig->lock_config($vmid, sub {
5593 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5594
5595 die "you can't start a vm if it's a template\n"
5596 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5597
5598 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5599 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5600
5601 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5602
5603 if ($has_backup_lock && $running) {
5604 # a backup is currently running, attempt to start the guest in the
5605 # existing QEMU instance
5606 return vm_resume($vmid);
5607 }
5608
5609 PVE::QemuConfig->check_lock($conf)
5610 if !($params->{skiplock} || $has_suspended_lock);
5611
5612 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5613
5614 die "VM $vmid already running\n" if $running;
5615
5616 if (my $storagemap = $migrate_opts->{storagemap}) {
5617 my $replicated = $migrate_opts->{replicated_volumes};
5618 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5619 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5620
5621 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5622 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5623 }
5624 }
5625
5626 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5627 });
5628 }
5629
5630
5631 # params:
5632 # statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5633 # skiplock => 0/1, skip checking for config lock
5634 # skiptemplate => 0/1, skip checking whether VM is template
5635 # forcemachine => to force QEMU machine (rollback/migration)
5636 # forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5637 # timeout => in seconds
5638 # paused => start VM in paused state (backup)
5639 # resume => resume from hibernation
5640 # pbs-backing => {
5641 # sata0 => {
5642 # repository
5643 # snapshot
5644 # keyfile
5645 # archive
5646 # },
5647 # virtio2 => ...
5648 # }
5649 # migrate_opts:
5650 # nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5651 # migratedfrom => source node
5652 # spice_ticket => used for spice migration, passed via tunnel/stdin
5653 # network => CIDR of migration network
5654 # type => secure/insecure - tunnel over encrypted connection or plain-text
5655 # nbd_proto_version => int, 0 for TCP, 1 for UNIX
5656 # replicated_volumes => which volids should be re-used with bitmaps for nbd migration
5657 # offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
5658 # contained in config
5659 sub vm_start_nolock {
5660 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5661
5662 my $statefile = $params->{statefile};
5663 my $resume = $params->{resume};
5664
5665 my $migratedfrom = $migrate_opts->{migratedfrom};
5666 my $migration_type = $migrate_opts->{type};
5667
5668 my $res = {};
5669
5670 # clean up leftover reboot request files
5671 eval { clear_reboot_request($vmid); };
5672 warn $@ if $@;
5673
5674 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5675 vmconfig_apply_pending($vmid, $conf, $storecfg);
5676 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5677 }
5678
5679 # don't regenerate the ISO if the VM is started as part of a live migration
5680 # this way we can reuse the old ISO with the correct config
5681 if (!$migratedfrom) {
5682 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5683 # FIXME: apply_cloudinit_config updates $conf in this case, and it would only drop
5684 # $conf->{cloudinit}, so we could just not do this?
5685 # But we do it above, so for now let's be consistent.
5686 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5687 }
5688 }
5689
5690 # override offline migrated volumes, conf is out of date still
5691 if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
5692 for my $key (sort keys $offline_volumes->%*) {
5693 my $parsed = parse_drive($key, $conf->{$key});
5694 $parsed->{file} = $offline_volumes->{$key};
5695 $conf->{$key} = print_drive($parsed);
5696 }
5697 }
5698
5699 my $defaults = load_defaults();
5700
5701 # set environment variable useful inside network script
5702 # for remote migration the config is available on the target node!
5703 if (!$migrate_opts->{remote_node}) {
5704 $ENV{PVE_MIGRATED_FROM} = $migratedfrom;
5705 }
5706
5707 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5708
5709 my $forcemachine = $params->{forcemachine};
5710 my $forcecpu = $params->{forcecpu};
5711 if ($resume) {
5712 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5713 $forcemachine = $conf->{runningmachine};
5714 $forcecpu = $conf->{runningcpu};
5715 print "Resuming suspended VM\n";
5716 }
5717
5718 my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid,
5719 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
5720
5721 my $migration_ip;
5722 my $get_migration_ip = sub {
5723 my ($nodename) = @_;
5724
5725 return $migration_ip if defined($migration_ip);
5726
5727 my $cidr = $migrate_opts->{network};
5728
5729 if (!defined($cidr)) {
5730 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5731 $cidr = $dc_conf->{migration}->{network};
5732 }
5733
5734 if (defined($cidr)) {
5735 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5736
5737 die "could not get IP: no address configured on local " .
5738 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5739
5740 die "could not get IP: multiple addresses configured on local " .
5741 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5742
5743 $migration_ip = @$ips[0];
5744 }
5745
5746 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5747 if !defined($migration_ip);
5748
5749 return $migration_ip;
5750 };
5751
5752 if ($statefile) {
5753 if ($statefile eq 'tcp') {
5754 my $migrate = $res->{migrate} = { proto => 'tcp' };
5755 $migrate->{addr} = "localhost";
5756 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5757 my $nodename = nodename();
5758
5759 if (!defined($migration_type)) {
5760 if (defined($datacenterconf->{migration}->{type})) {
5761 $migration_type = $datacenterconf->{migration}->{type};
5762 } else {
5763 $migration_type = 'secure';
5764 }
5765 }
5766
5767 if ($migration_type eq 'insecure') {
5768 $migrate->{addr} = $get_migration_ip->($nodename);
5769 $migrate->{addr} = "[$migrate->{addr}]" if Net::IP::ip_is_ipv6($migrate->{addr});
5770 }
5771
5772 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5773 $migrate->{port} = PVE::Tools::next_migrate_port($pfamily);
5774 $migrate->{uri} = "tcp:$migrate->{addr}:$migrate->{port}";
5775 push @$cmd, '-incoming', $migrate->{uri};
5776 push @$cmd, '-S';
5777
5778 } elsif ($statefile eq 'unix') {
5779 # should be default for secure migrations as a ssh TCP forward
5780 # tunnel is not deterministic reliable ready and fails regurarly
5781 # to set up in time, so use UNIX socket forwards
5782 my $migrate = $res->{migrate} = { proto => 'unix' };
5783 $migrate->{addr} = "/run/qemu-server/$vmid.migrate";
5784 unlink $migrate->{addr};
5785
5786 $migrate->{uri} = "unix:$migrate->{addr}";
5787 push @$cmd, '-incoming', $migrate->{uri};
5788 push @$cmd, '-S';
5789
5790 } elsif (-e $statefile) {
5791 push @$cmd, '-loadstate', $statefile;
5792 } else {
5793 my $statepath = PVE::Storage::path($storecfg, $statefile);
5794 push @$vollist, $statefile;
5795 push @$cmd, '-loadstate', $statepath;
5796 }
5797 } elsif ($params->{paused}) {
5798 push @$cmd, '-S';
5799 }
5800
5801 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5802
5803 my $pci_devices = {}; # host pci devices
5804 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
5805 my $dev = $conf->{"hostpci$i"} or next;
5806 $pci_devices->{$i} = parse_hostpci($dev);
5807 }
5808
5809 # do not reserve pciid for mediated devices, sysfs will error out for duplicate assignment
5810 my $real_pci_devices = [ grep { !(defined($_->{mdev}) && scalar($_->{pciid}->@*) == 1) } values $pci_devices->%* ];
5811
5812 # map to a flat list of pci ids
5813 my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } $real_pci_devices->@* ];
5814
5815 # reserve all PCI IDs before actually doing anything with them
5816 PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout);
5817
5818 eval {
5819 my $uuid;
5820 for my $id (sort keys %$pci_devices) {
5821 my $d = $pci_devices->{$id};
5822 for my $dev ($d->{pciid}->@*) {
5823 my $info = PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
5824
5825 # nvidia grid needs the uuid of the mdev as qemu parameter
5826 if ($d->{mdev} && !defined($uuid) && $info->{vendor} eq '10de') {
5827 $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $id);
5828 }
5829 }
5830 }
5831 push @$cmd, '-uuid', $uuid if defined($uuid);
5832 };
5833 if (my $err = $@) {
5834 eval { cleanup_pci_devices($vmid, $conf) };
5835 warn $@ if $@;
5836 die $err;
5837 }
5838
5839 PVE::Storage::activate_volumes($storecfg, $vollist);
5840
5841 eval {
5842 run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{});
5843 };
5844 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5845 # timeout should be more than enough here...
5846 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
5847
5848 my $cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
5849
5850 my %run_params = (
5851 timeout => $statefile ? undef : $start_timeout,
5852 umask => 0077,
5853 noerr => 1,
5854 );
5855
5856 # when migrating, prefix QEMU output so other side can pick up any
5857 # errors that might occur and show the user
5858 if ($migratedfrom) {
5859 $run_params{quiet} = 1;
5860 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5861 }
5862
5863 my %systemd_properties = (
5864 Slice => 'qemu.slice',
5865 KillMode => 'process',
5866 SendSIGKILL => 0,
5867 TimeoutStopUSec => ULONG_MAX, # infinity
5868 );
5869
5870 if (PVE::CGroup::cgroup_mode() == 2) {
5871 $systemd_properties{CPUWeight} = $cpuunits;
5872 } else {
5873 $systemd_properties{CPUShares} = $cpuunits;
5874 }
5875
5876 if (my $cpulimit = $conf->{cpulimit}) {
5877 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5878 }
5879 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5880
5881 my $run_qemu = sub {
5882 PVE::Tools::run_fork sub {
5883 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5884
5885 my $tpmpid;
5886 if (my $tpm = $conf->{tpmstate0}) {
5887 # start the TPM emulator so QEMU can connect on start
5888 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5889 }
5890
5891 my $exitcode = run_command($cmd, %run_params);
5892 if ($exitcode) {
5893 if ($tpmpid) {
5894 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5895 kill 'TERM', $tpmpid;
5896 }
5897 die "QEMU exited with code $exitcode\n";
5898 }
5899 };
5900 };
5901
5902 if ($conf->{hugepages}) {
5903
5904 my $code = sub {
5905 my $hugepages_topology = PVE::QemuServer::Memory::hugepages_topology($conf);
5906 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5907
5908 PVE::QemuServer::Memory::hugepages_mount();
5909 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5910
5911 eval { $run_qemu->() };
5912 if (my $err = $@) {
5913 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5914 if !$conf->{keephugepages};
5915 die $err;
5916 }
5917
5918 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5919 if !$conf->{keephugepages};
5920 };
5921 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5922
5923 } else {
5924 eval { $run_qemu->() };
5925 }
5926
5927 if (my $err = $@) {
5928 # deactivate volumes if start fails
5929 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5930 warn $@ if $@;
5931 eval { cleanup_pci_devices($vmid, $conf) };
5932 warn $@ if $@;
5933
5934 die "start failed: $err";
5935 }
5936
5937 # re-reserve all PCI IDs now that we can know the actual VM PID
5938 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5939 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) };
5940 warn $@ if $@;
5941
5942 if (defined($res->{migrate})) {
5943 print "migration listens on $res->{migrate}->{uri}\n";
5944 } elsif ($statefile) {
5945 eval { mon_cmd($vmid, "cont"); };
5946 warn $@ if $@;
5947 }
5948
5949 #start nbd server for storage migration
5950 if (my $nbd = $migrate_opts->{nbd}) {
5951 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
5952
5953 my $migrate_storage_uri;
5954 # nbd_protocol_version > 0 for unix socket support
5955 if ($nbd_protocol_version > 0 && ($migration_type eq 'secure' || $migration_type eq 'websocket')) {
5956 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5957 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5958 $migrate_storage_uri = "nbd:unix:$socket_path";
5959 $res->{migrate}->{unix_sockets} = [$socket_path];
5960 } else {
5961 my $nodename = nodename();
5962 my $localip = $get_migration_ip->($nodename);
5963 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5964 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
5965
5966 mon_cmd($vmid, "nbd-server-start", addr => {
5967 type => 'inet',
5968 data => {
5969 host => "${localip}",
5970 port => "${storage_migrate_port}",
5971 },
5972 });
5973 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5974 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
5975 }
5976
5977 my $block_info = mon_cmd($vmid, "query-block");
5978 $block_info = { map { $_->{device} => $_ } $block_info->@* };
5979
5980 foreach my $opt (sort keys %$nbd) {
5981 my $drivestr = $nbd->{$opt}->{drivestr};
5982 my $volid = $nbd->{$opt}->{volid};
5983
5984 my $block_node = $block_info->{"drive-$opt"}->{inserted}->{'node-name'};
5985
5986 mon_cmd(
5987 $vmid,
5988 "block-export-add",
5989 id => "drive-$opt",
5990 'node-name' => $block_node,
5991 writable => JSON::true,
5992 type => "nbd",
5993 name => "drive-$opt", # NBD export name
5994 );
5995
5996 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
5997 print "storage migration listens on $nbd_uri volume:$drivestr\n";
5998 print "re-using replicated volume: $opt - $volid\n"
5999 if $nbd->{$opt}->{replicated};
6000
6001 $res->{drives}->{$opt} = $nbd->{$opt};
6002 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
6003 }
6004 }
6005
6006 if ($migratedfrom) {
6007 eval {
6008 set_migration_caps($vmid);
6009 };
6010 warn $@ if $@;
6011
6012 if ($spice_port) {
6013 print "spice listens on port $spice_port\n";
6014 $res->{spice_port} = $spice_port;
6015 if ($migrate_opts->{spice_ticket}) {
6016 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
6017 $migrate_opts->{spice_ticket});
6018 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
6019 }
6020 }
6021
6022 } else {
6023 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
6024 if !$statefile && $conf->{balloon};
6025
6026 foreach my $opt (keys %$conf) {
6027 next if $opt !~ m/^net\d+$/;
6028 my $nicconf = parse_net($conf->{$opt});
6029 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
6030 }
6031 add_nets_bridge_fdb($conf, $vmid);
6032 }
6033
6034 if (!defined($conf->{balloon}) || $conf->{balloon}) {
6035 mon_cmd(
6036 $vmid,
6037 'qom-set',
6038 path => "machine/peripheral/balloon0",
6039 property => "guest-stats-polling-interval",
6040 value => 2
6041 );
6042 }
6043
6044 if ($resume) {
6045 print "Resumed VM, removing state\n";
6046 if (my $vmstate = $conf->{vmstate}) {
6047 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6048 PVE::Storage::vdisk_free($storecfg, $vmstate);
6049 }
6050 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
6051 PVE::QemuConfig->write_config($vmid, $conf);
6052 }
6053
6054 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
6055
6056 return $res;
6057 }
6058
6059 sub vm_commandline {
6060 my ($storecfg, $vmid, $snapname) = @_;
6061
6062 my $conf = PVE::QemuConfig->load_config($vmid);
6063
6064 my ($forcemachine, $forcecpu);
6065 if ($snapname) {
6066 my $snapshot = $conf->{snapshots}->{$snapname};
6067 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
6068
6069 # check for machine or CPU overrides in snapshot
6070 $forcemachine = $snapshot->{runningmachine};
6071 $forcecpu = $snapshot->{runningcpu};
6072
6073 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
6074
6075 $conf = $snapshot;
6076 }
6077
6078 my $defaults = load_defaults();
6079
6080 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
6081
6082 return PVE::Tools::cmd2string($cmd);
6083 }
6084
6085 sub vm_reset {
6086 my ($vmid, $skiplock) = @_;
6087
6088 PVE::QemuConfig->lock_config($vmid, sub {
6089
6090 my $conf = PVE::QemuConfig->load_config($vmid);
6091
6092 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6093
6094 mon_cmd($vmid, "system_reset");
6095 });
6096 }
6097
6098 sub get_vm_volumes {
6099 my ($conf) = @_;
6100
6101 my $vollist = [];
6102 foreach_volid($conf, sub {
6103 my ($volid, $attr) = @_;
6104
6105 return if $volid =~ m|^/|;
6106
6107 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
6108 return if !$sid;
6109
6110 push @$vollist, $volid;
6111 });
6112
6113 return $vollist;
6114 }
6115
6116 sub cleanup_pci_devices {
6117 my ($vmid, $conf) = @_;
6118
6119 foreach my $key (keys %$conf) {
6120 next if $key !~ m/^hostpci(\d+)$/;
6121 my $hostpciindex = $1;
6122 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
6123 my $d = parse_hostpci($conf->{$key});
6124 if ($d->{mdev}) {
6125 # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
6126 # don't want to break ABI just for this two liner
6127 my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
6128 PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1") if -e $dev_sysfs_dir;
6129 }
6130 }
6131 PVE::QemuServer::PCI::remove_pci_reservation($vmid);
6132 }
6133
6134 sub vm_stop_cleanup {
6135 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
6136
6137 eval {
6138
6139 if (!$keepActive) {
6140 my $vollist = get_vm_volumes($conf);
6141 PVE::Storage::deactivate_volumes($storecfg, $vollist);
6142
6143 if (my $tpmdrive = $conf->{tpmstate0}) {
6144 my $tpm = parse_drive("tpmstate0", $tpmdrive);
6145 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
6146 if ($storeid) {
6147 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
6148 }
6149 }
6150 }
6151
6152 foreach my $ext (qw(mon qmp pid vnc qga)) {
6153 unlink "/var/run/qemu-server/${vmid}.$ext";
6154 }
6155
6156 if ($conf->{ivshmem}) {
6157 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
6158 # just delete it for now, VMs which have this already open do not
6159 # are affected, but new VMs will get a separated one. If this
6160 # becomes an issue we either add some sort of ref-counting or just
6161 # add a "don't delete on stop" flag to the ivshmem format.
6162 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
6163 }
6164
6165 cleanup_pci_devices($vmid, $conf);
6166
6167 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
6168 };
6169 warn $@ if $@; # avoid errors - just warn
6170 }
6171
6172 # call only in locked context
6173 sub _do_vm_stop {
6174 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
6175
6176 my $pid = check_running($vmid, $nocheck);
6177 return if !$pid;
6178
6179 my $conf;
6180 if (!$nocheck) {
6181 $conf = PVE::QemuConfig->load_config($vmid);
6182 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6183 if (!defined($timeout) && $shutdown && $conf->{startup}) {
6184 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
6185 $timeout = $opts->{down} if $opts->{down};
6186 }
6187 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
6188 }
6189
6190 eval {
6191 if ($shutdown) {
6192 if (defined($conf) && get_qga_key($conf, 'enabled')) {
6193 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
6194 } else {
6195 mon_cmd($vmid, "system_powerdown");
6196 }
6197 } else {
6198 mon_cmd($vmid, "quit");
6199 }
6200 };
6201 my $err = $@;
6202
6203 if (!$err) {
6204 $timeout = 60 if !defined($timeout);
6205
6206 my $count = 0;
6207 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6208 $count++;
6209 sleep 1;
6210 }
6211
6212 if ($count >= $timeout) {
6213 if ($force) {
6214 warn "VM still running - terminating now with SIGTERM\n";
6215 kill 15, $pid;
6216 } else {
6217 die "VM quit/powerdown failed - got timeout\n";
6218 }
6219 } else {
6220 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6221 return;
6222 }
6223 } else {
6224 if (!check_running($vmid, $nocheck)) {
6225 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
6226 return;
6227 }
6228 if ($force) {
6229 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
6230 kill 15, $pid;
6231 } else {
6232 die "VM quit/powerdown failed\n";
6233 }
6234 }
6235
6236 # wait again
6237 $timeout = 10;
6238
6239 my $count = 0;
6240 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6241 $count++;
6242 sleep 1;
6243 }
6244
6245 if ($count >= $timeout) {
6246 warn "VM still running - terminating now with SIGKILL\n";
6247 kill 9, $pid;
6248 sleep 1;
6249 }
6250
6251 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6252 }
6253
6254 # Note: use $nocheck to skip tests if VM configuration file exists.
6255 # We need that when migration VMs to other nodes (files already moved)
6256 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
6257 sub vm_stop {
6258 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
6259
6260 $force = 1 if !defined($force) && !$shutdown;
6261
6262 if ($migratedfrom){
6263 my $pid = check_running($vmid, $nocheck, $migratedfrom);
6264 kill 15, $pid if $pid;
6265 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
6266 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
6267 return;
6268 }
6269
6270 PVE::QemuConfig->lock_config($vmid, sub {
6271 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
6272 });
6273 }
6274
6275 sub vm_reboot {
6276 my ($vmid, $timeout) = @_;
6277
6278 PVE::QemuConfig->lock_config($vmid, sub {
6279 eval {
6280
6281 # only reboot if running, as qmeventd starts it again on a stop event
6282 return if !check_running($vmid);
6283
6284 create_reboot_request($vmid);
6285
6286 my $storecfg = PVE::Storage::config();
6287 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
6288
6289 };
6290 if (my $err = $@) {
6291 # avoid that the next normal shutdown will be confused for a reboot
6292 clear_reboot_request($vmid);
6293 die $err;
6294 }
6295 });
6296 }
6297
6298 # note: if using the statestorage parameter, the caller has to check privileges
6299 sub vm_suspend {
6300 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
6301
6302 my $conf;
6303 my $path;
6304 my $storecfg;
6305 my $vmstate;
6306
6307 PVE::QemuConfig->lock_config($vmid, sub {
6308
6309 $conf = PVE::QemuConfig->load_config($vmid);
6310
6311 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
6312 PVE::QemuConfig->check_lock($conf)
6313 if !($skiplock || $is_backing_up);
6314
6315 die "cannot suspend to disk during backup\n"
6316 if $is_backing_up && $includestate;
6317
6318 if ($includestate) {
6319 $conf->{lock} = 'suspending';
6320 my $date = strftime("%Y-%m-%d", localtime(time()));
6321 $storecfg = PVE::Storage::config();
6322 if (!$statestorage) {
6323 $statestorage = find_vmstate_storage($conf, $storecfg);
6324 # check permissions for the storage
6325 my $rpcenv = PVE::RPCEnvironment::get();
6326 if ($rpcenv->{type} ne 'cli') {
6327 my $authuser = $rpcenv->get_user();
6328 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
6329 }
6330 }
6331
6332
6333 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
6334 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
6335 $path = PVE::Storage::path($storecfg, $vmstate);
6336 PVE::QemuConfig->write_config($vmid, $conf);
6337 } else {
6338 mon_cmd($vmid, "stop");
6339 }
6340 });
6341
6342 if ($includestate) {
6343 # save vm state
6344 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
6345
6346 eval {
6347 set_migration_caps($vmid, 1);
6348 mon_cmd($vmid, "savevm-start", statefile => $path);
6349 for(;;) {
6350 my $state = mon_cmd($vmid, "query-savevm");
6351 if (!$state->{status}) {
6352 die "savevm not active\n";
6353 } elsif ($state->{status} eq 'active') {
6354 sleep(1);
6355 next;
6356 } elsif ($state->{status} eq 'completed') {
6357 print "State saved, quitting\n";
6358 last;
6359 } elsif ($state->{status} eq 'failed' && $state->{error}) {
6360 die "query-savevm failed with error '$state->{error}'\n"
6361 } else {
6362 die "query-savevm returned status '$state->{status}'\n";
6363 }
6364 }
6365 };
6366 my $err = $@;
6367
6368 PVE::QemuConfig->lock_config($vmid, sub {
6369 $conf = PVE::QemuConfig->load_config($vmid);
6370 if ($err) {
6371 # cleanup, but leave suspending lock, to indicate something went wrong
6372 eval {
6373 mon_cmd($vmid, "savevm-end");
6374 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6375 PVE::Storage::vdisk_free($storecfg, $vmstate);
6376 delete $conf->@{qw(vmstate runningmachine runningcpu)};
6377 PVE::QemuConfig->write_config($vmid, $conf);
6378 };
6379 warn $@ if $@;
6380 die $err;
6381 }
6382
6383 die "lock changed unexpectedly\n"
6384 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6385
6386 mon_cmd($vmid, "quit");
6387 $conf->{lock} = 'suspended';
6388 PVE::QemuConfig->write_config($vmid, $conf);
6389 });
6390 }
6391 }
6392
6393 # $nocheck is set when called as part of a migration - in this context the
6394 # location of the config file (source or target node) is not deterministic,
6395 # since migration cannot wait for pmxcfs to process the rename
6396 sub vm_resume {
6397 my ($vmid, $skiplock, $nocheck) = @_;
6398
6399 PVE::QemuConfig->lock_config($vmid, sub {
6400 my $res = mon_cmd($vmid, 'query-status');
6401 my $resume_cmd = 'cont';
6402 my $reset = 0;
6403 my $conf;
6404 if ($nocheck) {
6405 $conf = eval { PVE::QemuConfig->load_config($vmid) }; # try on target node
6406 if ($@) {
6407 my $vmlist = PVE::Cluster::get_vmlist();
6408 if (exists($vmlist->{ids}->{$vmid})) {
6409 my $node = $vmlist->{ids}->{$vmid}->{node};
6410 $conf = eval { PVE::QemuConfig->load_config($vmid, $node) }; # try on source node
6411 }
6412 if (!$conf) {
6413 PVE::Cluster::cfs_update(); # vmlist was wrong, invalidate cache
6414 $conf = PVE::QemuConfig->load_config($vmid); # last try on target node again
6415 }
6416 }
6417 } else {
6418 $conf = PVE::QemuConfig->load_config($vmid);
6419 }
6420
6421 if ($res->{status}) {
6422 return if $res->{status} eq 'running'; # job done, go home
6423 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6424 $reset = 1 if $res->{status} eq 'shutdown';
6425 }
6426
6427 if (!$nocheck) {
6428 PVE::QemuConfig->check_lock($conf)
6429 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
6430 }
6431
6432 if ($reset) {
6433 # required if a VM shuts down during a backup and we get a resume
6434 # request before the backup finishes for example
6435 mon_cmd($vmid, "system_reset");
6436 }
6437
6438 add_nets_bridge_fdb($conf, $vmid) if $resume_cmd eq 'cont';
6439
6440 mon_cmd($vmid, $resume_cmd);
6441 });
6442 }
6443
6444 sub vm_sendkey {
6445 my ($vmid, $skiplock, $key) = @_;
6446
6447 PVE::QemuConfig->lock_config($vmid, sub {
6448
6449 my $conf = PVE::QemuConfig->load_config($vmid);
6450
6451 # there is no qmp command, so we use the human monitor command
6452 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
6453 die $res if $res ne '';
6454 });
6455 }
6456
6457 # vzdump restore implementaion
6458
6459 sub tar_archive_read_firstfile {
6460 my $archive = shift;
6461
6462 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6463
6464 # try to detect archive type first
6465 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
6466 die "unable to open file '$archive'\n";
6467 my $firstfile = <$fh>;
6468 kill 15, $pid;
6469 close $fh;
6470
6471 die "ERROR: archive contaions no data\n" if !$firstfile;
6472 chomp $firstfile;
6473
6474 return $firstfile;
6475 }
6476
6477 sub tar_restore_cleanup {
6478 my ($storecfg, $statfile) = @_;
6479
6480 print STDERR "starting cleanup\n";
6481
6482 if (my $fd = IO::File->new($statfile, "r")) {
6483 while (defined(my $line = <$fd>)) {
6484 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6485 my $volid = $2;
6486 eval {
6487 if ($volid =~ m|^/|) {
6488 unlink $volid || die 'unlink failed\n';
6489 } else {
6490 PVE::Storage::vdisk_free($storecfg, $volid);
6491 }
6492 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6493 };
6494 print STDERR "unable to cleanup '$volid' - $@" if $@;
6495 } else {
6496 print STDERR "unable to parse line in statfile - $line";
6497 }
6498 }
6499 $fd->close();
6500 }
6501 }
6502
6503 sub restore_file_archive {
6504 my ($archive, $vmid, $user, $opts) = @_;
6505
6506 return restore_vma_archive($archive, $vmid, $user, $opts)
6507 if $archive eq '-';
6508
6509 my $info = PVE::Storage::archive_info($archive);
6510 my $format = $opts->{format} // $info->{format};
6511 my $comp = $info->{compression};
6512
6513 # try to detect archive format
6514 if ($format eq 'tar') {
6515 return restore_tar_archive($archive, $vmid, $user, $opts);
6516 } else {
6517 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6518 }
6519 }
6520
6521 # hepler to remove disks that will not be used after restore
6522 my $restore_cleanup_oldconf = sub {
6523 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6524
6525 my $kept_disks = {};
6526
6527 PVE::QemuConfig->foreach_volume($oldconf, sub {
6528 my ($ds, $drive) = @_;
6529
6530 return if drive_is_cdrom($drive, 1);
6531
6532 my $volid = $drive->{file};
6533 return if !$volid || $volid =~ m|^/|;
6534
6535 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6536 return if !$path || !$owner || ($owner != $vmid);
6537
6538 # Note: only delete disk we want to restore
6539 # other volumes will become unused
6540 if ($virtdev_hash->{$ds}) {
6541 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6542 if (my $err = $@) {
6543 warn $err;
6544 }
6545 } else {
6546 $kept_disks->{$volid} = 1;
6547 }
6548 });
6549
6550 # after the restore we have no snapshots anymore
6551 for my $snapname (keys $oldconf->{snapshots}->%*) {
6552 my $snap = $oldconf->{snapshots}->{$snapname};
6553 if ($snap->{vmstate}) {
6554 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6555 if (my $err = $@) {
6556 warn $err;
6557 }
6558 }
6559
6560 for my $volid (keys $kept_disks->%*) {
6561 eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname); };
6562 warn $@ if $@;
6563 }
6564 }
6565 };
6566
6567 # Helper to parse vzdump backup device hints
6568 #
6569 # $rpcenv: Environment, used to ckeck storage permissions
6570 # $user: User ID, to check storage permissions
6571 # $storecfg: Storage configuration
6572 # $fh: the file handle for reading the configuration
6573 # $devinfo: should contain device sizes for all backu-up'ed devices
6574 # $options: backup options (pool, default storage)
6575 #
6576 # Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6577 my $parse_backup_hints = sub {
6578 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6579
6580 my $check_storage = sub { # assert if an image can be allocate
6581 my ($storeid, $scfg) = @_;
6582 die "Content type 'images' is not available on storage '$storeid'\n"
6583 if !$scfg->{content}->{images};
6584 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace'])
6585 if $user ne 'root@pam';
6586 };
6587
6588 my $virtdev_hash = {};
6589 while (defined(my $line = <$fh>)) {
6590 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6591 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6592 die "archive does not contain data for drive '$virtdev'\n"
6593 if !$devinfo->{$devname};
6594
6595 if (defined($options->{storage})) {
6596 $storeid = $options->{storage} || 'local';
6597 } elsif (!$storeid) {
6598 $storeid = 'local';
6599 }
6600 $format = 'raw' if !$format;
6601 $devinfo->{$devname}->{devname} = $devname;
6602 $devinfo->{$devname}->{virtdev} = $virtdev;
6603 $devinfo->{$devname}->{format} = $format;
6604 $devinfo->{$devname}->{storeid} = $storeid;
6605
6606 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6607 $check_storage->($storeid, $scfg); # permission and content type check
6608
6609 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6610 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6611 my $virtdev = $1;
6612 my $drive = parse_drive($virtdev, $2);
6613
6614 if (drive_is_cloudinit($drive)) {
6615 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6616 $storeid = $options->{storage} if defined ($options->{storage});
6617 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6618 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6619
6620 $check_storage->($storeid, $scfg); # permission and content type check
6621
6622 $virtdev_hash->{$virtdev} = {
6623 format => $format,
6624 storeid => $storeid,
6625 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6626 is_cloudinit => 1,
6627 };
6628 }
6629 }
6630 }
6631
6632 return $virtdev_hash;
6633 };
6634
6635 # Helper to allocate and activate all volumes required for a restore
6636 #
6637 # $storecfg: Storage configuration
6638 # $virtdev_hash: as returned by parse_backup_hints()
6639 #
6640 # Returns: { $virtdev => $volid }
6641 my $restore_allocate_devices = sub {
6642 my ($storecfg, $virtdev_hash, $vmid) = @_;
6643
6644 my $map = {};
6645 foreach my $virtdev (sort keys %$virtdev_hash) {
6646 my $d = $virtdev_hash->{$virtdev};
6647 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6648 my $storeid = $d->{storeid};
6649 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6650
6651 # test if requested format is supported
6652 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6653 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6654 $d->{format} = $defFormat if !$supported;
6655
6656 my $name;
6657 if ($d->{is_cloudinit}) {
6658 $name = "vm-$vmid-cloudinit";
6659 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6660 if ($scfg->{path}) {
6661 $name .= ".$d->{format}";
6662 }
6663 }
6664
6665 my $volid = PVE::Storage::vdisk_alloc(
6666 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6667
6668 print STDERR "new volume ID is '$volid'\n";
6669 $d->{volid} = $volid;
6670
6671 PVE::Storage::activate_volumes($storecfg, [$volid]);
6672
6673 $map->{$virtdev} = $volid;
6674 }
6675
6676 return $map;
6677 };
6678
6679 sub restore_update_config_line {
6680 my ($cookie, $map, $line, $unique) = @_;
6681
6682 return '' if $line =~ m/^\#qmdump\#/;
6683 return '' if $line =~ m/^\#vzdump\#/;
6684 return '' if $line =~ m/^lock:/;
6685 return '' if $line =~ m/^unused\d+:/;
6686 return '' if $line =~ m/^parent:/;
6687
6688 my $res = '';
6689
6690 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6691 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6692 # try to convert old 1.X settings
6693 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6694 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6695 my ($model, $macaddr) = split(/\=/, $devconfig);
6696 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6697 my $net = {
6698 model => $model,
6699 bridge => "vmbr$ind",
6700 macaddr => $macaddr,
6701 };
6702 my $netstr = print_net($net);
6703
6704 $res .= "net$cookie->{netcount}: $netstr\n";
6705 $cookie->{netcount}++;
6706 }
6707 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6708 my ($id, $netstr) = ($1, $2);
6709 my $net = parse_net($netstr);
6710 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6711 $netstr = print_net($net);
6712 $res .= "$id: $netstr\n";
6713 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6714 my $virtdev = $1;
6715 my $value = $3;
6716 my $di = parse_drive($virtdev, $value);
6717 if (defined($di->{backup}) && !$di->{backup}) {
6718 $res .= "#$line";
6719 } elsif ($map->{$virtdev}) {
6720 delete $di->{format}; # format can change on restore
6721 $di->{file} = $map->{$virtdev};
6722 $value = print_drive($di);
6723 $res .= "$virtdev: $value\n";
6724 } else {
6725 $res .= $line;
6726 }
6727 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6728 my $vmgenid = $1;
6729 if ($vmgenid ne '0') {
6730 # always generate a new vmgenid if there was a valid one setup
6731 $vmgenid = generate_uuid();
6732 }
6733 $res .= "vmgenid: $vmgenid\n";
6734 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6735 my ($uuid, $uuid_str);
6736 UUID::generate($uuid);
6737 UUID::unparse($uuid, $uuid_str);
6738 my $smbios1 = parse_smbios1($2);
6739 $smbios1->{uuid} = $uuid_str;
6740 $res .= $1.print_smbios1($smbios1)."\n";
6741 } else {
6742 $res .= $line;
6743 }
6744
6745 return $res;
6746 }
6747
6748 my $restore_deactivate_volumes = sub {
6749 my ($storecfg, $virtdev_hash) = @_;
6750
6751 my $vollist = [];
6752 for my $dev (values $virtdev_hash->%*) {
6753 push $vollist->@*, $dev->{volid} if $dev->{volid};
6754 }
6755
6756 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
6757 print STDERR $@ if $@;
6758 };
6759
6760 my $restore_destroy_volumes = sub {
6761 my ($storecfg, $virtdev_hash) = @_;
6762
6763 for my $dev (values $virtdev_hash->%*) {
6764 my $volid = $dev->{volid} or next;
6765 eval {
6766 PVE::Storage::vdisk_free($storecfg, $volid);
6767 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6768 };
6769 print STDERR "unable to cleanup '$volid' - $@" if $@;
6770 }
6771 };
6772
6773 my $restore_merge_config = sub {
6774 my ($filename, $backup_conf_raw, $override_conf) = @_;
6775
6776 my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
6777 for my $key (keys $override_conf->%*) {
6778 $backup_conf->{$key} = $override_conf->{$key};
6779 }
6780
6781 return $backup_conf;
6782 };
6783
6784 sub scan_volids {
6785 my ($cfg, $vmid) = @_;
6786
6787 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6788
6789 my $volid_hash = {};
6790 foreach my $storeid (keys %$info) {
6791 foreach my $item (@{$info->{$storeid}}) {
6792 next if !($item->{volid} && $item->{size});
6793 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6794 $volid_hash->{$item->{volid}} = $item;
6795 }
6796 }
6797
6798 return $volid_hash;
6799 }
6800
6801 sub update_disk_config {
6802 my ($vmid, $conf, $volid_hash) = @_;
6803
6804 my $changes;
6805 my $prefix = "VM $vmid";
6806
6807 # used and unused disks
6808 my $referenced = {};
6809
6810 # Note: it is allowed to define multiple storages with same path (alias), so
6811 # we need to check both 'volid' and real 'path' (two different volid can point
6812 # to the same path).
6813
6814 my $referencedpath = {};
6815
6816 # update size info
6817 PVE::QemuConfig->foreach_volume($conf, sub {
6818 my ($opt, $drive) = @_;
6819
6820 my $volid = $drive->{file};
6821 return if !$volid;
6822 my $volume = $volid_hash->{$volid};
6823
6824 # mark volid as "in-use" for next step
6825 $referenced->{$volid} = 1;
6826 if ($volume && (my $path = $volume->{path})) {
6827 $referencedpath->{$path} = 1;
6828 }
6829
6830 return if drive_is_cdrom($drive);
6831 return if !$volume;
6832
6833 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6834 if (defined($updated)) {
6835 $changes = 1;
6836 $conf->{$opt} = print_drive($updated);
6837 print "$prefix ($opt): $msg\n";
6838 }
6839 });
6840
6841 # remove 'unusedX' entry if volume is used
6842 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6843 my ($opt, $drive) = @_;
6844
6845 my $volid = $drive->{file};
6846 return if !$volid;
6847
6848 my $path;
6849 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6850 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6851 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6852 $changes = 1;
6853 delete $conf->{$opt};
6854 }
6855
6856 $referenced->{$volid} = 1;
6857 $referencedpath->{$path} = 1 if $path;
6858 });
6859
6860 foreach my $volid (sort keys %$volid_hash) {
6861 next if $volid =~ m/vm-$vmid-state-/;
6862 next if $referenced->{$volid};
6863 my $path = $volid_hash->{$volid}->{path};
6864 next if !$path; # just to be sure
6865 next if $referencedpath->{$path};
6866 $changes = 1;
6867 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6868 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6869 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6870 }
6871
6872 return $changes;
6873 }
6874
6875 sub rescan {
6876 my ($vmid, $nolock, $dryrun) = @_;
6877
6878 my $cfg = PVE::Storage::config();
6879
6880 print "rescan volumes...\n";
6881 my $volid_hash = scan_volids($cfg, $vmid);
6882
6883 my $updatefn = sub {
6884 my ($vmid) = @_;
6885
6886 my $conf = PVE::QemuConfig->load_config($vmid);
6887
6888 PVE::QemuConfig->check_lock($conf);
6889
6890 my $vm_volids = {};
6891 foreach my $volid (keys %$volid_hash) {
6892 my $info = $volid_hash->{$volid};
6893 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6894 }
6895
6896 my $changes = update_disk_config($vmid, $conf, $vm_volids);
6897
6898 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
6899 };
6900
6901 if (defined($vmid)) {
6902 if ($nolock) {
6903 &$updatefn($vmid);
6904 } else {
6905 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6906 }
6907 } else {
6908 my $vmlist = config_list();
6909 foreach my $vmid (keys %$vmlist) {
6910 if ($nolock) {
6911 &$updatefn($vmid);
6912 } else {
6913 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6914 }
6915 }
6916 }
6917 }
6918
6919 sub restore_proxmox_backup_archive {
6920 my ($archive, $vmid, $user, $options) = @_;
6921
6922 my $storecfg = PVE::Storage::config();
6923
6924 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
6925 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6926
6927 my $fingerprint = $scfg->{fingerprint};
6928 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
6929
6930 my $repo = PVE::PBSClient::get_repository($scfg);
6931 my $namespace = $scfg->{namespace};
6932
6933 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
6934 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
6935 local $ENV{PBS_PASSWORD} = $password;
6936 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
6937
6938 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
6939 PVE::Storage::parse_volname($storecfg, $archive);
6940
6941 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
6942
6943 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
6944
6945 my $tmpdir = "/var/tmp/vzdumptmp$$";
6946 rmtree $tmpdir;
6947 mkpath $tmpdir;
6948
6949 my $conffile = PVE::QemuConfig->config_file($vmid);
6950 # disable interrupts (always do cleanups)
6951 local $SIG{INT} =
6952 local $SIG{TERM} =
6953 local $SIG{QUIT} =
6954 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
6955
6956 # Note: $oldconf is undef if VM does not exists
6957 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6958 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6959 my $new_conf_raw = '';
6960
6961 my $rpcenv = PVE::RPCEnvironment::get();
6962 my $devinfo = {}; # info about drives included in backup
6963 my $virtdev_hash = {}; # info about allocated drives
6964
6965 eval {
6966 # enable interrupts
6967 local $SIG{INT} =
6968 local $SIG{TERM} =
6969 local $SIG{QUIT} =
6970 local $SIG{HUP} =
6971 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6972
6973 my $cfgfn = "$tmpdir/qemu-server.conf";
6974 my $firewall_config_fn = "$tmpdir/fw.conf";
6975 my $index_fn = "$tmpdir/index.json";
6976
6977 my $cmd = "restore";
6978
6979 my $param = [$pbs_backup_name, "index.json", $index_fn];
6980 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6981 my $index = PVE::Tools::file_get_contents($index_fn);
6982 $index = decode_json($index);
6983
6984 foreach my $info (@{$index->{files}}) {
6985 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
6986 my $devname = $1;
6987 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
6988 $devinfo->{$devname}->{size} = $1;
6989 } else {
6990 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
6991 }
6992 }
6993 }
6994
6995 my $is_qemu_server_backup = scalar(
6996 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
6997 );
6998 if (!$is_qemu_server_backup) {
6999 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
7000 }
7001 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
7002
7003 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
7004 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7005
7006 if ($has_firewall_config) {
7007 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
7008 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7009
7010 my $pve_firewall_dir = '/etc/pve/firewall';
7011 mkdir $pve_firewall_dir; # make sure the dir exists
7012 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
7013 }
7014
7015 my $fh = IO::File->new($cfgfn, "r") ||
7016 die "unable to read qemu-server.conf - $!\n";
7017
7018 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
7019
7020 # fixme: rate limit?
7021
7022 # create empty/temp config
7023 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
7024
7025 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
7026
7027 # allocate volumes
7028 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
7029
7030 foreach my $virtdev (sort keys %$virtdev_hash) {
7031 my $d = $virtdev_hash->{$virtdev};
7032 next if $d->{is_cloudinit}; # no need to restore cloudinit
7033
7034 # this fails if storage is unavailable
7035 my $volid = $d->{volid};
7036 my $path = PVE::Storage::path($storecfg, $volid);
7037
7038 # for live-restore we only want to preload the efidisk and TPM state
7039 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
7040
7041 my @ns_arg;
7042 if (defined(my $ns = $scfg->{namespace})) {
7043 @ns_arg = ('--ns', $ns);
7044 }
7045
7046 my $pbs_restore_cmd = [
7047 '/usr/bin/pbs-restore',
7048 '--repository', $repo,
7049 @ns_arg,
7050 $pbs_backup_name,
7051 "$d->{devname}.img.fidx",
7052 $path,
7053 '--verbose',
7054 ];
7055
7056 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
7057 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
7058
7059 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
7060 push @$pbs_restore_cmd, '--skip-zero';
7061 }
7062
7063 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
7064 print "restore proxmox backup image: $dbg_cmdstring\n";
7065 run_command($pbs_restore_cmd);
7066 }
7067
7068 $fh->seek(0, 0) || die "seek failed - $!\n";
7069
7070 my $cookie = { netcount => 0 };
7071 while (defined(my $line = <$fh>)) {
7072 $new_conf_raw .= restore_update_config_line(
7073 $cookie,
7074 $map,
7075 $line,
7076 $options->{unique},
7077 );
7078 }
7079
7080 $fh->close();
7081 };
7082 my $err = $@;
7083
7084 if ($err || !$options->{live}) {
7085 $restore_deactivate_volumes->($storecfg, $virtdev_hash);
7086 }
7087
7088 rmtree $tmpdir;
7089
7090 if ($err) {
7091 $restore_destroy_volumes->($storecfg, $virtdev_hash);
7092 die $err;
7093 }
7094
7095 if ($options->{live}) {
7096 # keep lock during live-restore
7097 $new_conf_raw .= "\nlock: create";
7098 }
7099
7100 my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $options->{override_conf});
7101 PVE::QemuConfig->write_config($vmid, $new_conf);
7102
7103 eval { rescan($vmid, 1); };
7104 warn $@ if $@;
7105
7106 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
7107
7108 if ($options->{live}) {
7109 # enable interrupts
7110 local $SIG{INT} =
7111 local $SIG{TERM} =
7112 local $SIG{QUIT} =
7113 local $SIG{HUP} =
7114 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
7115
7116 my $conf = PVE::QemuConfig->load_config($vmid);
7117 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
7118
7119 # these special drives are already restored before start
7120 delete $devinfo->{'drive-efidisk0'};
7121 delete $devinfo->{'drive-tpmstate0-backup'};
7122
7123 my $pbs_opts = {
7124 repo => $repo,
7125 keyfile => $keyfile,
7126 snapshot => $pbs_backup_name,
7127 namespace => $namespace,
7128 };
7129 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $pbs_opts);
7130
7131 PVE::QemuConfig->remove_lock($vmid, "create");
7132 }
7133 }
7134
7135 sub pbs_live_restore {
7136 my ($vmid, $conf, $storecfg, $restored_disks, $opts) = @_;
7137
7138 print "starting VM for live-restore\n";
7139 print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n";
7140
7141 my $pbs_backing = {};
7142 for my $ds (keys %$restored_disks) {
7143 $ds =~ m/^drive-(.*)$/;
7144 my $confname = $1;
7145 $pbs_backing->{$confname} = {
7146 repository => $opts->{repo},
7147 snapshot => $opts->{snapshot},
7148 archive => "$ds.img.fidx",
7149 };
7150 $pbs_backing->{$confname}->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile};
7151 $pbs_backing->{$confname}->{namespace} = $opts->{namespace} if defined($opts->{namespace});
7152
7153 my $drive = parse_drive($confname, $conf->{$confname});
7154 print "restoring '$ds' to '$drive->{file}'\n";
7155 }
7156
7157 my $drives_streamed = 0;
7158 eval {
7159 # make sure HA doesn't interrupt our restore by stopping the VM
7160 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
7161 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
7162 }
7163
7164 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
7165 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
7166 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
7167
7168 my $qmeventd_fd = register_qmeventd_handle($vmid);
7169
7170 # begin streaming, i.e. data copy from PBS to target disk for every vol,
7171 # this will effectively collapse the backing image chain consisting of
7172 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
7173 # removes itself once all backing images vanish with 'auto-remove=on')
7174 my $jobs = {};
7175 for my $ds (sort keys %$restored_disks) {
7176 my $job_id = "restore-$ds";
7177 mon_cmd($vmid, 'block-stream',
7178 'job-id' => $job_id,
7179 device => "$ds",
7180 );
7181 $jobs->{$job_id} = {};
7182 }
7183
7184 mon_cmd($vmid, 'cont');
7185 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
7186
7187 print "restore-drive jobs finished successfully, removing all tracking block devices"
7188 ." to disconnect from Proxmox Backup Server\n";
7189
7190 for my $ds (sort keys %$restored_disks) {
7191 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
7192 }
7193
7194 close($qmeventd_fd);
7195 };
7196
7197 my $err = $@;
7198
7199 if ($err) {
7200 warn "An error occurred during live-restore: $err\n";
7201 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
7202 die "live-restore failed\n";
7203 }
7204 }
7205
7206 sub restore_vma_archive {
7207 my ($archive, $vmid, $user, $opts, $comp) = @_;
7208
7209 my $readfrom = $archive;
7210
7211 my $cfg = PVE::Storage::config();
7212 my $commands = [];
7213 my $bwlimit = $opts->{bwlimit};
7214
7215 my $dbg_cmdstring = '';
7216 my $add_pipe = sub {
7217 my ($cmd) = @_;
7218 push @$commands, $cmd;
7219 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
7220 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
7221 $readfrom = '-';
7222 };
7223
7224 my $input = undef;
7225 if ($archive eq '-') {
7226 $input = '<&STDIN';
7227 } else {
7228 # If we use a backup from a PVE defined storage we also consider that
7229 # storage's rate limit:
7230 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
7231 if (defined($volid)) {
7232 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
7233 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
7234 if ($readlimit) {
7235 print STDERR "applying read rate limit: $readlimit\n";
7236 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
7237 $add_pipe->($cstream);
7238 }
7239 }
7240 }
7241
7242 if ($comp) {
7243 my $info = PVE::Storage::decompressor_info('vma', $comp);
7244 my $cmd = $info->{decompressor};
7245 push @$cmd, $readfrom;
7246 $add_pipe->($cmd);
7247 }
7248
7249 my $tmpdir = "/var/tmp/vzdumptmp$$";
7250 rmtree $tmpdir;
7251
7252 # disable interrupts (always do cleanups)
7253 local $SIG{INT} =
7254 local $SIG{TERM} =
7255 local $SIG{QUIT} =
7256 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
7257
7258 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
7259 POSIX::mkfifo($mapfifo, 0600);
7260 my $fifofh;
7261 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
7262
7263 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
7264
7265 my $oldtimeout;
7266 my $timeout = 5;
7267
7268 my $devinfo = {}; # info about drives included in backup
7269 my $virtdev_hash = {}; # info about allocated drives
7270
7271 my $rpcenv = PVE::RPCEnvironment::get();
7272
7273 my $conffile = PVE::QemuConfig->config_file($vmid);
7274
7275 # Note: $oldconf is undef if VM does not exist
7276 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7277 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
7278 my $new_conf_raw = '';
7279
7280 my %storage_limits;
7281
7282 my $print_devmap = sub {
7283 my $cfgfn = "$tmpdir/qemu-server.conf";
7284
7285 # we can read the config - that is already extracted
7286 my $fh = IO::File->new($cfgfn, "r") ||
7287 die "unable to read qemu-server.conf - $!\n";
7288
7289 my $fwcfgfn = "$tmpdir/qemu-server.fw";
7290 if (-f $fwcfgfn) {
7291 my $pve_firewall_dir = '/etc/pve/firewall';
7292 mkdir $pve_firewall_dir; # make sure the dir exists
7293 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
7294 }
7295
7296 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
7297
7298 foreach my $info (values %{$virtdev_hash}) {
7299 my $storeid = $info->{storeid};
7300 next if defined($storage_limits{$storeid});
7301
7302 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
7303 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
7304 $storage_limits{$storeid} = $limit * 1024;
7305 }
7306
7307 foreach my $devname (keys %$devinfo) {
7308 die "found no device mapping information for device '$devname'\n"
7309 if !$devinfo->{$devname}->{virtdev};
7310 }
7311
7312 # create empty/temp config
7313 if ($oldconf) {
7314 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
7315 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
7316 }
7317
7318 # allocate volumes
7319 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
7320
7321 # print restore information to $fifofh
7322 foreach my $virtdev (sort keys %$virtdev_hash) {
7323 my $d = $virtdev_hash->{$virtdev};
7324 next if $d->{is_cloudinit}; # no need to restore cloudinit
7325
7326 my $storeid = $d->{storeid};
7327 my $volid = $d->{volid};
7328
7329 my $map_opts = '';
7330 if (my $limit = $storage_limits{$storeid}) {
7331 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
7332 }
7333
7334 my $write_zeros = 1;
7335 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
7336 $write_zeros = 0;
7337 }
7338
7339 my $path = PVE::Storage::path($cfg, $volid);
7340
7341 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
7342
7343 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
7344 }
7345
7346 $fh->seek(0, 0) || die "seek failed - $!\n";
7347
7348 my $cookie = { netcount => 0 };
7349 while (defined(my $line = <$fh>)) {
7350 $new_conf_raw .= restore_update_config_line(
7351 $cookie,
7352 $map,
7353 $line,
7354 $opts->{unique},
7355 );
7356 }
7357
7358 $fh->close();
7359 };
7360
7361 eval {
7362 # enable interrupts
7363 local $SIG{INT} =
7364 local $SIG{TERM} =
7365 local $SIG{QUIT} =
7366 local $SIG{HUP} =
7367 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7368 local $SIG{ALRM} = sub { die "got timeout\n"; };
7369
7370 $oldtimeout = alarm($timeout);
7371
7372 my $parser = sub {
7373 my $line = shift;
7374
7375 print "$line\n";
7376
7377 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
7378 my ($dev_id, $size, $devname) = ($1, $2, $3);
7379 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
7380 } elsif ($line =~ m/^CTIME: /) {
7381 # we correctly received the vma config, so we can disable
7382 # the timeout now for disk allocation (set to 10 minutes, so
7383 # that we always timeout if something goes wrong)
7384 alarm(600);
7385 &$print_devmap();
7386 print $fifofh "done\n";
7387 my $tmp = $oldtimeout || 0;
7388 $oldtimeout = undef;
7389 alarm($tmp);
7390 close($fifofh);
7391 $fifofh = undef;
7392 }
7393 };
7394
7395 print "restore vma archive: $dbg_cmdstring\n";
7396 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
7397 };
7398 my $err = $@;
7399
7400 alarm($oldtimeout) if $oldtimeout;
7401
7402 $restore_deactivate_volumes->($cfg, $virtdev_hash);
7403
7404 close($fifofh) if $fifofh;
7405 unlink $mapfifo;
7406 rmtree $tmpdir;
7407
7408 if ($err) {
7409 $restore_destroy_volumes->($cfg, $virtdev_hash);
7410 die $err;
7411 }
7412
7413 my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $opts->{override_conf});
7414 PVE::QemuConfig->write_config($vmid, $new_conf);
7415
7416 eval { rescan($vmid, 1); };
7417 warn $@ if $@;
7418
7419 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
7420 }
7421
7422 sub restore_tar_archive {
7423 my ($archive, $vmid, $user, $opts) = @_;
7424
7425 if (scalar(keys $opts->{override_conf}->%*) > 0) {
7426 my $keystring = join(' ', keys $opts->{override_conf}->%*);
7427 die "cannot pass along options ($keystring) when restoring from tar archive\n";
7428 }
7429
7430 if ($archive ne '-') {
7431 my $firstfile = tar_archive_read_firstfile($archive);
7432 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
7433 if $firstfile ne 'qemu-server.conf';
7434 }
7435
7436 my $storecfg = PVE::Storage::config();
7437
7438 # avoid zombie disks when restoring over an existing VM -> cleanup first
7439 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
7440 # skiplock=1 because qmrestore has set the 'create' lock itself already
7441 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
7442 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
7443
7444 my $tocmd = "/usr/lib/qemu-server/qmextract";
7445
7446 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
7447 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
7448 $tocmd .= ' --prealloc' if $opts->{prealloc};
7449 $tocmd .= ' --info' if $opts->{info};
7450
7451 # tar option "xf" does not autodetect compression when read from STDIN,
7452 # so we pipe to zcat
7453 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7454 PVE::Tools::shellquote("--to-command=$tocmd");
7455
7456 my $tmpdir = "/var/tmp/vzdumptmp$$";
7457 mkpath $tmpdir;
7458
7459 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7460 local $ENV{VZDUMP_VMID} = $vmid;
7461 local $ENV{VZDUMP_USER} = $user;
7462
7463 my $conffile = PVE::QemuConfig->config_file($vmid);
7464 my $new_conf_raw = '';
7465
7466 # disable interrupts (always do cleanups)
7467 local $SIG{INT} =
7468 local $SIG{TERM} =
7469 local $SIG{QUIT} =
7470 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7471
7472 eval {
7473 # enable interrupts
7474 local $SIG{INT} =
7475 local $SIG{TERM} =
7476 local $SIG{QUIT} =
7477 local $SIG{HUP} =
7478 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7479
7480 if ($archive eq '-') {
7481 print "extracting archive from STDIN\n";
7482 run_command($cmd, input => "<&STDIN");
7483 } else {
7484 print "extracting archive '$archive'\n";
7485 run_command($cmd);
7486 }
7487
7488 return if $opts->{info};
7489
7490 # read new mapping
7491 my $map = {};
7492 my $statfile = "$tmpdir/qmrestore.stat";
7493 if (my $fd = IO::File->new($statfile, "r")) {
7494 while (defined (my $line = <$fd>)) {
7495 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7496 $map->{$1} = $2 if $1;
7497 } else {
7498 print STDERR "unable to parse line in statfile - $line\n";
7499 }
7500 }
7501 $fd->close();
7502 }
7503
7504 my $confsrc = "$tmpdir/qemu-server.conf";
7505
7506 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
7507
7508 my $cookie = { netcount => 0 };
7509 while (defined (my $line = <$srcfd>)) {
7510 $new_conf_raw .= restore_update_config_line(
7511 $cookie,
7512 $map,
7513 $line,
7514 $opts->{unique},
7515 );
7516 }
7517
7518 $srcfd->close();
7519 };
7520 if (my $err = $@) {
7521 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7522 die $err;
7523 }
7524
7525 rmtree $tmpdir;
7526
7527 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7528
7529 PVE::Cluster::cfs_update(); # make sure we read new file
7530
7531 eval { rescan($vmid, 1); };
7532 warn $@ if $@;
7533 };
7534
7535 sub foreach_storage_used_by_vm {
7536 my ($conf, $func) = @_;
7537
7538 my $sidhash = {};
7539
7540 PVE::QemuConfig->foreach_volume($conf, sub {
7541 my ($ds, $drive) = @_;
7542 return if drive_is_cdrom($drive);
7543
7544 my $volid = $drive->{file};
7545
7546 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7547 $sidhash->{$sid} = $sid if $sid;
7548 });
7549
7550 foreach my $sid (sort keys %$sidhash) {
7551 &$func($sid);
7552 }
7553 }
7554
7555 my $qemu_snap_storage = {
7556 rbd => 1,
7557 };
7558 sub do_snapshots_with_qemu {
7559 my ($storecfg, $volid, $deviceid) = @_;
7560
7561 return if $deviceid =~ m/tpmstate0/;
7562
7563 my $storage_name = PVE::Storage::parse_volume_id($volid);
7564 my $scfg = $storecfg->{ids}->{$storage_name};
7565 die "could not find storage '$storage_name'\n" if !defined($scfg);
7566
7567 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7568 return 1;
7569 }
7570
7571 if ($volid =~ m/\.(qcow2|qed)$/){
7572 return 1;
7573 }
7574
7575 return;
7576 }
7577
7578 sub qga_check_running {
7579 my ($vmid, $nowarn) = @_;
7580
7581 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7582 if ($@) {
7583 warn "QEMU Guest Agent is not running - $@" if !$nowarn;
7584 return 0;
7585 }
7586 return 1;
7587 }
7588
7589 sub template_create {
7590 my ($vmid, $conf, $disk) = @_;
7591
7592 my $storecfg = PVE::Storage::config();
7593
7594 PVE::QemuConfig->foreach_volume($conf, sub {
7595 my ($ds, $drive) = @_;
7596
7597 return if drive_is_cdrom($drive);
7598 return if $disk && $ds ne $disk;
7599
7600 my $volid = $drive->{file};
7601 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7602
7603 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7604 $drive->{file} = $voliddst;
7605 $conf->{$ds} = print_drive($drive);
7606 PVE::QemuConfig->write_config($vmid, $conf);
7607 });
7608 }
7609
7610 sub convert_iscsi_path {
7611 my ($path) = @_;
7612
7613 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7614 my $portal = $1;
7615 my $target = $2;
7616 my $lun = $3;
7617
7618 my $initiator_name = get_initiator_name();
7619
7620 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7621 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7622 }
7623
7624 die "cannot convert iscsi path '$path', unkown format\n";
7625 }
7626
7627 sub qemu_img_convert {
7628 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized) = @_;
7629
7630 my $storecfg = PVE::Storage::config();
7631 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7632 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7633
7634 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7635
7636 my $cachemode;
7637 my $src_path;
7638 my $src_is_iscsi = 0;
7639 my $src_format;
7640
7641 if ($src_storeid) {
7642 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7643 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7644 $src_format = qemu_img_format($src_scfg, $src_volname);
7645 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7646 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7647 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7648 } elsif (-f $src_volid || -b $src_volid) {
7649 $src_path = $src_volid;
7650 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7651 $src_format = $1;
7652 }
7653 }
7654
7655 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7656
7657 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7658 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7659 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7660 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7661
7662 my $cmd = [];
7663 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7664 push @$cmd, '-l', "snapshot.name=$snapname"
7665 if $snapname && $src_format && $src_format eq "qcow2";
7666 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7667 push @$cmd, '-T', $cachemode if defined($cachemode);
7668
7669 if ($src_is_iscsi) {
7670 push @$cmd, '--image-opts';
7671 $src_path = convert_iscsi_path($src_path);
7672 } elsif ($src_format) {
7673 push @$cmd, '-f', $src_format;
7674 }
7675
7676 if ($dst_is_iscsi) {
7677 push @$cmd, '--target-image-opts';
7678 $dst_path = convert_iscsi_path($dst_path);
7679 } else {
7680 push @$cmd, '-O', $dst_format;
7681 }
7682
7683 push @$cmd, $src_path;
7684
7685 if (!$dst_is_iscsi && $is_zero_initialized) {
7686 push @$cmd, "zeroinit:$dst_path";
7687 } else {
7688 push @$cmd, $dst_path;
7689 }
7690
7691 my $parser = sub {
7692 my $line = shift;
7693 if($line =~ m/\((\S+)\/100\%\)/){
7694 my $percent = $1;
7695 my $transferred = int($size * $percent / 100);
7696 my $total_h = render_bytes($size, 1);
7697 my $transferred_h = render_bytes($transferred, 1);
7698
7699 print "transferred $transferred_h of $total_h ($percent%)\n";
7700 }
7701
7702 };
7703
7704 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7705 my $err = $@;
7706 die "copy failed: $err" if $err;
7707 }
7708
7709 sub qemu_img_format {
7710 my ($scfg, $volname) = @_;
7711
7712 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7713 return $1;
7714 } else {
7715 return "raw";
7716 }
7717 }
7718
7719 sub qemu_drive_mirror {
7720 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7721
7722 $jobs = {} if !$jobs;
7723
7724 my $qemu_target;
7725 my $format;
7726 $jobs->{"drive-$drive"} = {};
7727
7728 if ($dst_volid =~ /^nbd:/) {
7729 $qemu_target = $dst_volid;
7730 $format = "nbd";
7731 } else {
7732 my $storecfg = PVE::Storage::config();
7733 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7734
7735 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7736
7737 $format = qemu_img_format($dst_scfg, $dst_volname);
7738
7739 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7740
7741 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7742 }
7743
7744 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7745 $opts->{format} = $format if $format;
7746
7747 if (defined($src_bitmap)) {
7748 $opts->{sync} = 'incremental';
7749 $opts->{bitmap} = $src_bitmap;
7750 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7751 }
7752
7753 if (defined($bwlimit)) {
7754 $opts->{speed} = $bwlimit * 1024;
7755 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7756 } else {
7757 print "drive mirror is starting for drive-$drive\n";
7758 }
7759
7760 # if a job already runs for this device we get an error, catch it for cleanup
7761 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7762 if (my $err = $@) {
7763 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7764 warn "$@\n" if $@;
7765 die "mirroring error: $err\n";
7766 }
7767
7768 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7769 }
7770
7771 # $completion can be either
7772 # 'complete': wait until all jobs are ready, block-job-complete them (default)
7773 # 'cancel': wait until all jobs are ready, block-job-cancel them
7774 # 'skip': wait until all jobs are ready, return with block jobs in ready state
7775 # 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7776 sub qemu_drive_mirror_monitor {
7777 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7778
7779 $completion //= 'complete';
7780 $op //= "mirror";
7781
7782 eval {
7783 my $err_complete = 0;
7784
7785 my $starttime = time ();
7786 while (1) {
7787 die "block job ('$op') timed out\n" if $err_complete > 300;
7788
7789 my $stats = mon_cmd($vmid, "query-block-jobs");
7790 my $ctime = time();
7791
7792 my $running_jobs = {};
7793 for my $stat (@$stats) {
7794 next if $stat->{type} ne $op;
7795 $running_jobs->{$stat->{device}} = $stat;
7796 }
7797
7798 my $readycounter = 0;
7799
7800 for my $job_id (sort keys %$jobs) {
7801 my $job = $running_jobs->{$job_id};
7802
7803 my $vanished = !defined($job);
7804 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7805 if($complete || ($vanished && $completion eq 'auto')) {
7806 print "$job_id: $op-job finished\n";
7807 delete $jobs->{$job_id};
7808 next;
7809 }
7810
7811 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7812
7813 my $busy = $job->{busy};
7814 my $ready = $job->{ready};
7815 if (my $total = $job->{len}) {
7816 my $transferred = $job->{offset} || 0;
7817 my $remaining = $total - $transferred;
7818 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7819
7820 my $duration = $ctime - $starttime;
7821 my $total_h = render_bytes($total, 1);
7822 my $transferred_h = render_bytes($transferred, 1);
7823
7824 my $status = sprintf(
7825 "transferred $transferred_h of $total_h ($percent%%) in %s",
7826 render_duration($duration),
7827 );
7828
7829 if ($ready) {
7830 if ($busy) {
7831 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7832 } else {
7833 $status .= ", ready";
7834 }
7835 }
7836 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7837 $jobs->{$job_id}->{ready} = $ready;
7838 }
7839
7840 $readycounter++ if $job->{ready};
7841 }
7842
7843 last if scalar(keys %$jobs) == 0;
7844
7845 if ($readycounter == scalar(keys %$jobs)) {
7846 print "all '$op' jobs are ready\n";
7847
7848 # do the complete later (or has already been done)
7849 last if $completion eq 'skip' || $completion eq 'auto';
7850
7851 if ($vmiddst && $vmiddst != $vmid) {
7852 my $agent_running = $qga && qga_check_running($vmid);
7853 if ($agent_running) {
7854 print "freeze filesystem\n";
7855 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
7856 warn $@ if $@;
7857 } else {
7858 print "suspend vm\n";
7859 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
7860 warn $@ if $@;
7861 }
7862
7863 # if we clone a disk for a new target vm, we don't switch the disk
7864 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
7865
7866 if ($agent_running) {
7867 print "unfreeze filesystem\n";
7868 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
7869 warn $@ if $@;
7870 } else {
7871 print "resume vm\n";
7872 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7873 warn $@ if $@;
7874 }
7875
7876 last;
7877 } else {
7878
7879 for my $job_id (sort keys %$jobs) {
7880 # try to switch the disk if source and destination are on the same guest
7881 print "$job_id: Completing block job_id...\n";
7882
7883 my $op;
7884 if ($completion eq 'complete') {
7885 $op = 'block-job-complete';
7886 } elsif ($completion eq 'cancel') {
7887 $op = 'block-job-cancel';
7888 } else {
7889 die "invalid completion value: $completion\n";
7890 }
7891 eval { mon_cmd($vmid, $op, device => $job_id) };
7892 if ($@ =~ m/cannot be completed/) {
7893 print "$job_id: block job cannot be completed, trying again.\n";
7894 $err_complete++;
7895 }else {
7896 print "$job_id: Completed successfully.\n";
7897 $jobs->{$job_id}->{complete} = 1;
7898 }
7899 }
7900 }
7901 }
7902 sleep 1;
7903 }
7904 };
7905 my $err = $@;
7906
7907 if ($err) {
7908 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7909 die "block job ($op) error: $err";
7910 }
7911 }
7912
7913 sub qemu_blockjobs_cancel {
7914 my ($vmid, $jobs) = @_;
7915
7916 foreach my $job (keys %$jobs) {
7917 print "$job: Cancelling block job\n";
7918 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
7919 $jobs->{$job}->{cancel} = 1;
7920 }
7921
7922 while (1) {
7923 my $stats = mon_cmd($vmid, "query-block-jobs");
7924
7925 my $running_jobs = {};
7926 foreach my $stat (@$stats) {
7927 $running_jobs->{$stat->{device}} = $stat;
7928 }
7929
7930 foreach my $job (keys %$jobs) {
7931
7932 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
7933 print "$job: Done.\n";
7934 delete $jobs->{$job};
7935 }
7936 }
7937
7938 last if scalar(keys %$jobs) == 0;
7939
7940 sleep 1;
7941 }
7942 }
7943
7944 sub clone_disk {
7945 my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
7946
7947 my ($vmid, $running) = $source->@{qw(vmid running)};
7948 my ($src_drivename, $drive, $snapname) = $source->@{qw(drivename drive snapname)};
7949
7950 my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)};
7951 my ($storage, $format) = $dest->@{qw(storage format)};
7952
7953 my $use_drive_mirror = $full && $running && $src_drivename && !$snapname;
7954
7955 if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) {
7956 die "cloning from/to EFI disk requires EFI disk\n"
7957 if $src_drivename eq 'efidisk0' || $dst_drivename eq 'efidisk0';
7958 die "cloning from/to TPM state requires TPM state\n"
7959 if $src_drivename eq 'tpmstate0' || $dst_drivename eq 'tpmstate0';
7960
7961 # This would lead to two device nodes in QEMU pointing to the same backing image!
7962 die "cannot change drive name when cloning disk from/to the same VM\n"
7963 if $use_drive_mirror && $vmid == $newvmid;
7964 }
7965
7966 die "cannot move TPM state while VM is running\n"
7967 if $use_drive_mirror && $src_drivename eq 'tpmstate0';
7968
7969 my $newvolid;
7970
7971 print "create " . ($full ? 'full' : 'linked') . " clone of drive ";
7972 print "$src_drivename " if $src_drivename;
7973 print "($drive->{file})\n";
7974
7975 if (!$full) {
7976 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
7977 push @$newvollist, $newvolid;
7978 } else {
7979
7980 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
7981 $storeid = $storage if $storage;
7982
7983 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
7984
7985 my $name = undef;
7986 my $size = undef;
7987 if (drive_is_cloudinit($drive)) {
7988 $name = "vm-$newvmid-cloudinit";
7989 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7990 if ($scfg->{path}) {
7991 $name .= ".$dst_format";
7992 }
7993 $snapname = undef;
7994 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
7995 } elsif ($dst_drivename eq 'efidisk0') {
7996 $size = $efisize or die "internal error - need to specify EFI disk size\n";
7997 } elsif ($dst_drivename eq 'tpmstate0') {
7998 $dst_format = 'raw';
7999 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8000 } else {
8001 ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
8002 }
8003 $newvolid = PVE::Storage::vdisk_alloc(
8004 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
8005 );
8006 push @$newvollist, $newvolid;
8007
8008 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
8009
8010 if (drive_is_cloudinit($drive)) {
8011 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
8012 # if this is the case, we have to complete any block-jobs still there from
8013 # previous drive-mirrors
8014 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
8015 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
8016 }
8017 goto no_data_clone;
8018 }
8019
8020 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
8021 if ($use_drive_mirror) {
8022 qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
8023 $completion, $qga, $bwlimit);
8024 } else {
8025 # TODO: handle bwlimits
8026 if ($dst_drivename eq 'efidisk0') {
8027 # the relevant data on the efidisk may be smaller than the source
8028 # e.g. on RBD/ZFS, so we use dd to copy only the amount
8029 # that is given by the OVMF_VARS.fd
8030 my $src_path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
8031 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
8032
8033 my $src_format = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[6];
8034
8035 # better for Ceph if block size is not too small, see bug #3324
8036 my $bs = 1024*1024;
8037
8038 my $cmd = ['qemu-img', 'dd', '-n', '-O', $dst_format];
8039
8040 if ($src_format eq 'qcow2' && $snapname) {
8041 die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
8042 if !min_version(kvm_user_version(), 6, 2);
8043 push $cmd->@*, '-l', $snapname;
8044 }
8045 push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
8046 run_command($cmd);
8047 } else {
8048 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit);
8049 }
8050 }
8051 }
8052
8053 no_data_clone:
8054 my ($size) = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
8055
8056 my $disk = dclone($drive);
8057 delete $disk->{format};
8058 $disk->{file} = $newvolid;
8059 $disk->{size} = $size if defined($size);
8060
8061 return $disk;
8062 }
8063
8064 sub get_running_qemu_version {
8065 my ($vmid) = @_;
8066 my $res = mon_cmd($vmid, "query-version");
8067 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
8068 }
8069
8070 sub qemu_use_old_bios_files {
8071 my ($machine_type) = @_;
8072
8073 return if !$machine_type;
8074
8075 my $use_old_bios_files = undef;
8076
8077 if ($machine_type =~ m/^(\S+)\.pxe$/) {
8078 $machine_type = $1;
8079 $use_old_bios_files = 1;
8080 } else {
8081 my $version = extract_version($machine_type, kvm_user_version());
8082 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
8083 # load new efi bios files on migration. So this hack is required to allow
8084 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
8085 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
8086 $use_old_bios_files = !min_version($version, 2, 4);
8087 }
8088
8089 return ($use_old_bios_files, $machine_type);
8090 }
8091
8092 sub get_efivars_size {
8093 my ($conf, $efidisk) = @_;
8094
8095 my $arch = get_vm_arch($conf);
8096 $efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
8097 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
8098 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
8099 return -s $ovmf_vars;
8100 }
8101
8102 sub update_efidisk_size {
8103 my ($conf) = @_;
8104
8105 return if !defined($conf->{efidisk0});
8106
8107 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
8108 $disk->{size} = get_efivars_size($conf);
8109 $conf->{efidisk0} = print_drive($disk);
8110
8111 return;
8112 }
8113
8114 sub update_tpmstate_size {
8115 my ($conf) = @_;
8116
8117 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
8118 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8119 $conf->{tpmstate0} = print_drive($disk);
8120 }
8121
8122 sub create_efidisk($$$$$$$) {
8123 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
8124
8125 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
8126
8127 my $vars_size_b = -s $ovmf_vars;
8128 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
8129 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
8130 PVE::Storage::activate_volumes($storecfg, [$volid]);
8131
8132 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
8133 my ($size) = PVE::Storage::volume_size_info($storecfg, $volid, 3);
8134
8135 return ($volid, $size/1024);
8136 }
8137
8138 sub vm_iothreads_list {
8139 my ($vmid) = @_;
8140
8141 my $res = mon_cmd($vmid, 'query-iothreads');
8142
8143 my $iothreads = {};
8144 foreach my $iothread (@$res) {
8145 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
8146 }
8147
8148 return $iothreads;
8149 }
8150
8151 sub scsihw_infos {
8152 my ($conf, $drive) = @_;
8153
8154 my $maxdev = 0;
8155
8156 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
8157 $maxdev = 7;
8158 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
8159 $maxdev = 1;
8160 } else {
8161 $maxdev = 256;
8162 }
8163
8164 my $controller = int($drive->{index} / $maxdev);
8165 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
8166 ? "virtioscsi"
8167 : "scsihw";
8168
8169 return ($maxdev, $controller, $controller_prefix);
8170 }
8171
8172 sub resolve_dst_disk_format {
8173 my ($storecfg, $storeid, $src_volname, $format) = @_;
8174 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
8175
8176 if (!$format) {
8177 # if no target format is specified, use the source disk format as hint
8178 if ($src_volname) {
8179 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8180 $format = qemu_img_format($scfg, $src_volname);
8181 } else {
8182 return $defFormat;
8183 }
8184 }
8185
8186 # test if requested format is supported - else use default
8187 my $supported = grep { $_ eq $format } @$validFormats;
8188 $format = $defFormat if !$supported;
8189 return $format;
8190 }
8191
8192 # NOTE: if this logic changes, please update docs & possibly gui logic
8193 sub find_vmstate_storage {
8194 my ($conf, $storecfg) = @_;
8195
8196 # first, return storage from conf if set
8197 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
8198
8199 my ($target, $shared, $local);
8200
8201 foreach_storage_used_by_vm($conf, sub {
8202 my ($sid) = @_;
8203 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
8204 my $dst = $scfg->{shared} ? \$shared : \$local;
8205 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
8206 });
8207
8208 # second, use shared storage where VM has at least one disk
8209 # third, use local storage where VM has at least one disk
8210 # fall back to local storage
8211 $target = $shared // $local // 'local';
8212
8213 return $target;
8214 }
8215
8216 sub generate_uuid {
8217 my ($uuid, $uuid_str);
8218 UUID::generate($uuid);
8219 UUID::unparse($uuid, $uuid_str);
8220 return $uuid_str;
8221 }
8222
8223 sub generate_smbios1_uuid {
8224 return "uuid=".generate_uuid();
8225 }
8226
8227 sub nbd_stop {
8228 my ($vmid) = @_;
8229
8230 mon_cmd($vmid, 'nbd-server-stop');
8231 }
8232
8233 sub create_reboot_request {
8234 my ($vmid) = @_;
8235 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
8236 or die "failed to create reboot trigger file: $!\n";
8237 close($fh);
8238 }
8239
8240 sub clear_reboot_request {
8241 my ($vmid) = @_;
8242 my $path = "/run/qemu-server/$vmid.reboot";
8243 my $res = 0;
8244
8245 $res = unlink($path);
8246 die "could not remove reboot request for $vmid: $!"
8247 if !$res && $! != POSIX::ENOENT;
8248
8249 return $res;
8250 }
8251
8252 sub bootorder_from_legacy {
8253 my ($conf, $bootcfg) = @_;
8254
8255 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
8256 my $bootindex_hash = {};
8257 my $i = 1;
8258 foreach my $o (split(//, $boot)) {
8259 $bootindex_hash->{$o} = $i*100;
8260 $i++;
8261 }
8262
8263 my $bootorder = {};
8264
8265 PVE::QemuConfig->foreach_volume($conf, sub {
8266 my ($ds, $drive) = @_;
8267
8268 if (drive_is_cdrom ($drive, 1)) {
8269 if ($bootindex_hash->{d}) {
8270 $bootorder->{$ds} = $bootindex_hash->{d};
8271 $bootindex_hash->{d} += 1;
8272 }
8273 } elsif ($bootindex_hash->{c}) {
8274 $bootorder->{$ds} = $bootindex_hash->{c}
8275 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
8276 $bootindex_hash->{c} += 1;
8277 }
8278 });
8279
8280 if ($bootindex_hash->{n}) {
8281 for (my $i = 0; $i < $MAX_NETS; $i++) {
8282 my $netname = "net$i";
8283 next if !$conf->{$netname};
8284 $bootorder->{$netname} = $bootindex_hash->{n};
8285 $bootindex_hash->{n} += 1;
8286 }
8287 }
8288
8289 return $bootorder;
8290 }
8291
8292 # Generate default device list for 'boot: order=' property. Matches legacy
8293 # default boot order, but with explicit device names. This is important, since
8294 # the fallback for when neither 'order' nor the old format is specified relies
8295 # on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
8296 sub get_default_bootdevices {
8297 my ($conf) = @_;
8298
8299 my @ret = ();
8300
8301 # harddisk
8302 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
8303 push @ret, $first if $first;
8304
8305 # cdrom
8306 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
8307 push @ret, $first if $first;
8308
8309 # network
8310 for (my $i = 0; $i < $MAX_NETS; $i++) {
8311 my $netname = "net$i";
8312 next if !$conf->{$netname};
8313 push @ret, $netname;
8314 last;
8315 }
8316
8317 return \@ret;
8318 }
8319
8320 sub device_bootorder {
8321 my ($conf) = @_;
8322
8323 return bootorder_from_legacy($conf) if !defined($conf->{boot});
8324
8325 my $boot = parse_property_string($boot_fmt, $conf->{boot});
8326
8327 my $bootorder = {};
8328 if (!defined($boot) || $boot->{legacy}) {
8329 $bootorder = bootorder_from_legacy($conf, $boot);
8330 } elsif ($boot->{order}) {
8331 my $i = 100; # start at 100 to allow user to insert devices before us with -args
8332 for my $dev (PVE::Tools::split_list($boot->{order})) {
8333 $bootorder->{$dev} = $i++;
8334 }
8335 }
8336
8337 return $bootorder;
8338 }
8339
8340 sub register_qmeventd_handle {
8341 my ($vmid) = @_;
8342
8343 my $fh;
8344 my $peer = "/var/run/qmeventd.sock";
8345 my $count = 0;
8346
8347 for (;;) {
8348 $count++;
8349 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
8350 last if $fh;
8351 if ($! != EINTR && $! != EAGAIN) {
8352 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
8353 }
8354 if ($count > 4) {
8355 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
8356 . "after $count retries\n";
8357 }
8358 usleep(25000);
8359 }
8360
8361 # send handshake to mark VM as backing up
8362 print $fh to_json({vzdump => {vmid => "$vmid"}});
8363
8364 # return handle to be closed later when inhibit is no longer required
8365 return $fh;
8366 }
8367
8368 # bash completion helper
8369
8370 sub complete_backup_archives {
8371 my ($cmdname, $pname, $cvalue) = @_;
8372
8373 my $cfg = PVE::Storage::config();
8374
8375 my $storeid;
8376
8377 if ($cvalue =~ m/^([^:]+):/) {
8378 $storeid = $1;
8379 }
8380
8381 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
8382
8383 my $res = [];
8384 foreach my $id (keys %$data) {
8385 foreach my $item (@{$data->{$id}}) {
8386 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
8387 push @$res, $item->{volid} if defined($item->{volid});
8388 }
8389 }
8390
8391 return $res;
8392 }
8393
8394 my $complete_vmid_full = sub {
8395 my ($running) = @_;
8396
8397 my $idlist = vmstatus();
8398
8399 my $res = [];
8400
8401 foreach my $id (keys %$idlist) {
8402 my $d = $idlist->{$id};
8403 if (defined($running)) {
8404 next if $d->{template};
8405 next if $running && $d->{status} ne 'running';
8406 next if !$running && $d->{status} eq 'running';
8407 }
8408 push @$res, $id;
8409
8410 }
8411 return $res;
8412 };
8413
8414 sub complete_vmid {
8415 return &$complete_vmid_full();
8416 }
8417
8418 sub complete_vmid_stopped {
8419 return &$complete_vmid_full(0);
8420 }
8421
8422 sub complete_vmid_running {
8423 return &$complete_vmid_full(1);
8424 }
8425
8426 sub complete_storage {
8427
8428 my $cfg = PVE::Storage::config();
8429 my $ids = $cfg->{ids};
8430
8431 my $res = [];
8432 foreach my $sid (keys %$ids) {
8433 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
8434 next if !$ids->{$sid}->{content}->{images};
8435 push @$res, $sid;
8436 }
8437
8438 return $res;
8439 }
8440
8441 sub complete_migration_storage {
8442 my ($cmd, $param, $current_value, $all_args) = @_;
8443
8444 my $targetnode = @$all_args[1];
8445
8446 my $cfg = PVE::Storage::config();
8447 my $ids = $cfg->{ids};
8448
8449 my $res = [];
8450 foreach my $sid (keys %$ids) {
8451 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
8452 next if !$ids->{$sid}->{content}->{images};
8453 push @$res, $sid;
8454 }
8455
8456 return $res;
8457 }
8458
8459 sub vm_is_paused {
8460 my ($vmid) = @_;
8461 my $qmpstatus = eval {
8462 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8463 mon_cmd($vmid, "query-status");
8464 };
8465 warn "$@\n" if $@;
8466 return $qmpstatus && $qmpstatus->{status} eq "paused";
8467 }
8468
8469 sub check_volume_storage_type {
8470 my ($storecfg, $vol) = @_;
8471
8472 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8473 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8474 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8475
8476 die "storage '$storeid' does not support content-type '$vtype'\n"
8477 if !$scfg->{content}->{$vtype};
8478
8479 return 1;
8480 }
8481
8482 sub add_nets_bridge_fdb {
8483 my ($conf, $vmid) = @_;
8484
8485 for my $opt (keys %$conf) {
8486 next if $opt !~ m/^net(\d+)$/;
8487 my $iface = "tap${vmid}i$1";
8488 # NOTE: expect setups with learning off to *not* use auto-random-generation of MAC on start
8489 my $net = parse_net($conf->{$opt}, 1) or next;
8490
8491 my $mac = $net->{macaddr};
8492 if (!$mac) {
8493 log_warn("MAC learning disabled, but vNIC '$iface' has no static MAC to add to forwarding DB!")
8494 if !file_read_firstline("/sys/class/net/$iface/brport/learning");
8495 next;
8496 }
8497
8498 my $bridge = $net->{bridge};
8499 if ($have_sdn) {
8500 PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
8501 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
8502 PVE::Network::add_bridge_fdb($iface, $mac, $net->{firewall});
8503 }
8504 }
8505 }
8506
8507 sub del_nets_bridge_fdb {
8508 my ($conf, $vmid) = @_;
8509
8510 for my $opt (keys %$conf) {
8511 next if $opt !~ m/^net(\d+)$/;
8512 my $iface = "tap${vmid}i$1";
8513
8514 my $net = parse_net($conf->{$opt}) or next;
8515 my $mac = $net->{macaddr} or next;
8516
8517 my $bridge = $net->{bridge};
8518 if ($have_sdn) {
8519 PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
8520 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
8521 PVE::Network::del_bridge_fdb($iface, $mac, $net->{firewall});
8522 }
8523 }
8524 }
8525
8526 1;