]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
migration: nbd export: switch away from deprecated QMP command
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5
6 use Cwd 'abs_path';
7 use Digest::SHA;
8 use Fcntl ':flock';
9 use Fcntl;
10 use File::Basename;
11 use File::Copy qw(copy);
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use IO::Dir;
16 use IO::File;
17 use IO::Handle;
18 use IO::Select;
19 use IO::Socket::UNIX;
20 use IPC::Open3;
21 use JSON;
22 use List::Util qw(first);
23 use MIME::Base64;
24 use POSIX;
25 use Storable qw(dclone);
26 use Time::HiRes qw(gettimeofday usleep);
27 use URI::Escape;
28 use UUID;
29
30 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
31 use PVE::CGroup;
32 use PVE::CpuSet;
33 use PVE::DataCenterConfig;
34 use PVE::Exception qw(raise raise_param_exc);
35 use PVE::Format qw(render_duration render_bytes);
36 use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
37 use PVE::INotify;
38 use PVE::JSONSchema qw(get_standard_option parse_property_string);
39 use PVE::ProcFSTools;
40 use PVE::PBSClient;
41 use PVE::RESTEnvironment qw(log_warn);
42 use PVE::RPCEnvironment;
43 use PVE::Storage;
44 use PVE::SysFSTools;
45 use PVE::Systemd;
46 use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
47
48 use PVE::QMPClient;
49 use PVE::QemuConfig;
50 use PVE::QemuServer::Helpers qw(min_version config_aware_timeout windows_version);
51 use PVE::QemuServer::Cloudinit;
52 use PVE::QemuServer::CGroup;
53 use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
54 use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
55 use PVE::QemuServer::Machine;
56 use PVE::QemuServer::Memory;
57 use PVE::QemuServer::Monitor qw(mon_cmd);
58 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
59 use PVE::QemuServer::USB qw(parse_usb_device);
60
61 my $have_sdn;
62 eval {
63 require PVE::Network::SDN::Zones;
64 $have_sdn = 1;
65 };
66
67 my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
68 my $OVMF = {
69 x86_64 => {
70 '4m-no-smm' => [
71 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
72 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
73 ],
74 '4m-no-smm-ms' => [
75 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
76 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
77 ],
78 '4m' => [
79 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
80 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
81 ],
82 '4m-ms' => [
83 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
84 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
85 ],
86 default => [
87 "$EDK2_FW_BASE/OVMF_CODE.fd",
88 "$EDK2_FW_BASE/OVMF_VARS.fd",
89 ],
90 },
91 aarch64 => {
92 default => [
93 "$EDK2_FW_BASE/AAVMF_CODE.fd",
94 "$EDK2_FW_BASE/AAVMF_VARS.fd",
95 ],
96 },
97 };
98
99 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
100
101 # Note about locking: we use flock on the config file protect against concurent actions.
102 # Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
103 # 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
104 # But you can ignore this kind of lock with the --skiplock flag.
105
106 cfs_register_file(
107 '/qemu-server/',
108 \&parse_vm_config,
109 \&write_vm_config
110 );
111
112 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
113 description => "Some command save/restore state from this location.",
114 type => 'string',
115 maxLength => 128,
116 optional => 1,
117 });
118
119 PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
120 description => "Specifies the QEMU machine type.",
121 type => 'string',
122 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
123 maxLength => 40,
124 optional => 1,
125 });
126
127 # FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way
128 my $nodename_cache;
129 sub nodename {
130 $nodename_cache //= PVE::INotify::nodename();
131 return $nodename_cache;
132 }
133
134 my $watchdog_fmt = {
135 model => {
136 default_key => 1,
137 type => 'string',
138 enum => [qw(i6300esb ib700)],
139 description => "Watchdog type to emulate.",
140 default => 'i6300esb',
141 optional => 1,
142 },
143 action => {
144 type => 'string',
145 enum => [qw(reset shutdown poweroff pause debug none)],
146 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
147 optional => 1,
148 },
149 };
150 PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
151
152 my $agent_fmt = {
153 enabled => {
154 description => "Enable/disable communication with a QEMU Guest Agent (QGA) running in the VM.",
155 type => 'boolean',
156 default => 0,
157 default_key => 1,
158 },
159 fstrim_cloned_disks => {
160 description => "Run fstrim after moving a disk or migrating the VM.",
161 type => 'boolean',
162 optional => 1,
163 default => 0
164 },
165 type => {
166 description => "Select the agent type",
167 type => 'string',
168 default => 'virtio',
169 optional => 1,
170 enum => [qw(virtio isa)],
171 },
172 };
173
174 my $vga_fmt = {
175 type => {
176 description => "Select the VGA type.",
177 type => 'string',
178 default => 'std',
179 optional => 1,
180 default_key => 1,
181 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware)],
182 },
183 memory => {
184 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
185 type => 'integer',
186 optional => 1,
187 minimum => 4,
188 maximum => 512,
189 },
190 };
191
192 my $ivshmem_fmt = {
193 size => {
194 type => 'integer',
195 minimum => 1,
196 description => "The size of the file in MB.",
197 },
198 name => {
199 type => 'string',
200 pattern => '[a-zA-Z0-9\-]+',
201 optional => 1,
202 format_description => 'string',
203 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
204 },
205 };
206
207 my $audio_fmt = {
208 device => {
209 type => 'string',
210 enum => [qw(ich9-intel-hda intel-hda AC97)],
211 description => "Configure an audio device."
212 },
213 driver => {
214 type => 'string',
215 enum => ['spice', 'none'],
216 default => 'spice',
217 optional => 1,
218 description => "Driver backend for the audio device."
219 },
220 };
221
222 my $spice_enhancements_fmt = {
223 foldersharing => {
224 type => 'boolean',
225 optional => 1,
226 default => '0',
227 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
228 },
229 videostreaming => {
230 type => 'string',
231 enum => ['off', 'all', 'filter'],
232 default => 'off',
233 optional => 1,
234 description => "Enable video streaming. Uses compression for detected video streams."
235 },
236 };
237
238 my $rng_fmt = {
239 source => {
240 type => 'string',
241 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
242 default_key => 1,
243 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
244 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
245 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
246 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
247 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
248 ." a hardware RNG from the host.",
249 },
250 max_bytes => {
251 type => 'integer',
252 description => "Maximum bytes of entropy allowed to get injected into the guest every"
253 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
254 ." `0` to disable limiting (potentially dangerous!).",
255 optional => 1,
256
257 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
258 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
259 # reading from /dev/urandom
260 default => 1024,
261 },
262 period => {
263 type => 'integer',
264 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
265 ." the guest to retrieve another 'max_bytes' of entropy.",
266 optional => 1,
267 default => 1000,
268 },
269 };
270
271 my $meta_info_fmt = {
272 'ctime' => {
273 type => 'integer',
274 description => "The guest creation timestamp as UNIX epoch time",
275 minimum => 0,
276 optional => 1,
277 },
278 'creation-qemu' => {
279 type => 'string',
280 description => "The QEMU (machine) version from the time this VM was created.",
281 pattern => '\d+(\.\d+)+',
282 optional => 1,
283 },
284 };
285
286 my $confdesc = {
287 onboot => {
288 optional => 1,
289 type => 'boolean',
290 description => "Specifies whether a VM will be started during system bootup.",
291 default => 0,
292 },
293 autostart => {
294 optional => 1,
295 type => 'boolean',
296 description => "Automatic restart after crash (currently ignored).",
297 default => 0,
298 },
299 hotplug => {
300 optional => 1,
301 type => 'string', format => 'pve-hotplug-features',
302 description => "Selectively enable hotplug features. This is a comma separated list of"
303 ." hotplug features: 'network', 'disk', 'cpu', 'memory', 'usb' and 'cloudinit'. Use '0' to disable"
304 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`."
305 ." USB hotplugging is possible for guests with machine version >= 7.1 and ostype l26 or"
306 ." windows > 7.",
307 default => 'network,disk,usb',
308 },
309 reboot => {
310 optional => 1,
311 type => 'boolean',
312 description => "Allow reboot. If set to '0' the VM exit on reboot.",
313 default => 1,
314 },
315 lock => {
316 optional => 1,
317 type => 'string',
318 description => "Lock/unlock the VM.",
319 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
320 },
321 cpulimit => {
322 optional => 1,
323 type => 'number',
324 description => "Limit of CPU usage.",
325 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
326 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
327 minimum => 0,
328 maximum => 128,
329 default => 0,
330 },
331 cpuunits => {
332 optional => 1,
333 type => 'integer',
334 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
335 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
336 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
337 ." weights of all the other running VMs.",
338 minimum => 1,
339 maximum => 262144,
340 default => 'cgroup v1: 1024, cgroup v2: 100',
341 },
342 memory => {
343 optional => 1,
344 type => 'integer',
345 description => "Amount of RAM for the VM in MB. This is the maximum available memory when"
346 ." you use the balloon device.",
347 minimum => 16,
348 default => 512,
349 },
350 balloon => {
351 optional => 1,
352 type => 'integer',
353 description => "Amount of target RAM for the VM in MB. Using zero disables the ballon driver.",
354 minimum => 0,
355 },
356 shares => {
357 optional => 1,
358 type => 'integer',
359 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
360 ." more memory this VM gets. Number is relative to weights of all other running VMs."
361 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
362 minimum => 0,
363 maximum => 50000,
364 default => 1000,
365 },
366 keyboard => {
367 optional => 1,
368 type => 'string',
369 description => "Keyboard layout for VNC server. This option is generally not required and"
370 ." is often better handled from within the guest OS.",
371 enum => PVE::Tools::kvmkeymaplist(),
372 default => undef,
373 },
374 name => {
375 optional => 1,
376 type => 'string', format => 'dns-name',
377 description => "Set a name for the VM. Only used on the configuration web interface.",
378 },
379 scsihw => {
380 optional => 1,
381 type => 'string',
382 description => "SCSI controller model",
383 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
384 default => 'lsi',
385 },
386 description => {
387 optional => 1,
388 type => 'string',
389 description => "Description for the VM. Shown in the web-interface VM's summary."
390 ." This is saved as comment inside the configuration file.",
391 maxLength => 1024 * 8,
392 },
393 ostype => {
394 optional => 1,
395 type => 'string',
396 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
397 description => "Specify guest operating system.",
398 verbose_description => <<EODESC,
399 Specify guest operating system. This is used to enable special
400 optimization/features for specific operating systems:
401
402 [horizontal]
403 other;; unspecified OS
404 wxp;; Microsoft Windows XP
405 w2k;; Microsoft Windows 2000
406 w2k3;; Microsoft Windows 2003
407 w2k8;; Microsoft Windows 2008
408 wvista;; Microsoft Windows Vista
409 win7;; Microsoft Windows 7
410 win8;; Microsoft Windows 8/2012/2012r2
411 win10;; Microsoft Windows 10/2016/2019
412 win11;; Microsoft Windows 11/2022
413 l24;; Linux 2.4 Kernel
414 l26;; Linux 2.6 - 5.X Kernel
415 solaris;; Solaris/OpenSolaris/OpenIndiania kernel
416 EODESC
417 },
418 boot => {
419 optional => 1,
420 type => 'string', format => 'pve-qm-boot',
421 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
422 ." key or 'legacy=' is deprecated.",
423 },
424 bootdisk => {
425 optional => 1,
426 type => 'string', format => 'pve-qm-bootdisk',
427 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
428 pattern => '(ide|sata|scsi|virtio)\d+',
429 },
430 smp => {
431 optional => 1,
432 type => 'integer',
433 description => "The number of CPUs. Please use option -sockets instead.",
434 minimum => 1,
435 default => 1,
436 },
437 sockets => {
438 optional => 1,
439 type => 'integer',
440 description => "The number of CPU sockets.",
441 minimum => 1,
442 default => 1,
443 },
444 cores => {
445 optional => 1,
446 type => 'integer',
447 description => "The number of cores per socket.",
448 minimum => 1,
449 default => 1,
450 },
451 numa => {
452 optional => 1,
453 type => 'boolean',
454 description => "Enable/disable NUMA.",
455 default => 0,
456 },
457 hugepages => {
458 optional => 1,
459 type => 'string',
460 description => "Enable/disable hugepages memory.",
461 enum => [qw(any 2 1024)],
462 },
463 keephugepages => {
464 optional => 1,
465 type => 'boolean',
466 default => 0,
467 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
468 ." after VM shutdown and can be used for subsequent starts.",
469 },
470 vcpus => {
471 optional => 1,
472 type => 'integer',
473 description => "Number of hotplugged vcpus.",
474 minimum => 1,
475 default => 0,
476 },
477 acpi => {
478 optional => 1,
479 type => 'boolean',
480 description => "Enable/disable ACPI.",
481 default => 1,
482 },
483 agent => {
484 optional => 1,
485 description => "Enable/disable communication with the QEMU Guest Agent and its properties.",
486 type => 'string',
487 format => $agent_fmt,
488 },
489 kvm => {
490 optional => 1,
491 type => 'boolean',
492 description => "Enable/disable KVM hardware virtualization.",
493 default => 1,
494 },
495 tdf => {
496 optional => 1,
497 type => 'boolean',
498 description => "Enable/disable time drift fix.",
499 default => 0,
500 },
501 localtime => {
502 optional => 1,
503 type => 'boolean',
504 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
505 ." the `ostype` indicates a Microsoft Windows OS.",
506 },
507 freeze => {
508 optional => 1,
509 type => 'boolean',
510 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
511 },
512 vga => {
513 optional => 1,
514 type => 'string', format => $vga_fmt,
515 description => "Configure the VGA hardware.",
516 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
517 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
518 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
519 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
520 ." display server. For win* OS you can select how many independent displays you want,"
521 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
522 ." using a serial device as terminal.",
523 },
524 watchdog => {
525 optional => 1,
526 type => 'string', format => 'pve-qm-watchdog',
527 description => "Create a virtual hardware watchdog device.",
528 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
529 ." action), the watchdog must be periodically polled by an agent inside the guest or"
530 ." else the watchdog will reset the guest (or execute the respective action specified)",
531 },
532 startdate => {
533 optional => 1,
534 type => 'string',
535 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
536 description => "Set the initial date of the real time clock. Valid format for date are:"
537 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
538 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
539 default => 'now',
540 },
541 startup => get_standard_option('pve-startup-order'),
542 template => {
543 optional => 1,
544 type => 'boolean',
545 description => "Enable/disable Template.",
546 default => 0,
547 },
548 args => {
549 optional => 1,
550 type => 'string',
551 description => "Arbitrary arguments passed to kvm.",
552 verbose_description => <<EODESCR,
553 Arbitrary arguments passed to kvm, for example:
554
555 args: -no-reboot -no-hpet
556
557 NOTE: this option is for experts only.
558 EODESCR
559 },
560 tablet => {
561 optional => 1,
562 type => 'boolean',
563 default => 1,
564 description => "Enable/disable the USB tablet device.",
565 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
566 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
567 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
568 ." may consider disabling this to save some context switches. This is turned off by"
569 ." default if you use spice (`qm set <vmid> --vga qxl`).",
570 },
571 migrate_speed => {
572 optional => 1,
573 type => 'integer',
574 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
575 minimum => 0,
576 default => 0,
577 },
578 migrate_downtime => {
579 optional => 1,
580 type => 'number',
581 description => "Set maximum tolerated downtime (in seconds) for migrations.",
582 minimum => 0,
583 default => 0.1,
584 },
585 cdrom => {
586 optional => 1,
587 type => 'string', format => 'pve-qm-ide',
588 typetext => '<volume>',
589 description => "This is an alias for option -ide2",
590 },
591 cpu => {
592 optional => 1,
593 description => "Emulated CPU type.",
594 type => 'string',
595 format => 'pve-vm-cpu-conf',
596 },
597 parent => get_standard_option('pve-snapshot-name', {
598 optional => 1,
599 description => "Parent snapshot name. This is used internally, and should not be modified.",
600 }),
601 snaptime => {
602 optional => 1,
603 description => "Timestamp for snapshots.",
604 type => 'integer',
605 minimum => 0,
606 },
607 vmstate => {
608 optional => 1,
609 type => 'string', format => 'pve-volume-id',
610 description => "Reference to a volume which stores the VM state. This is used internally"
611 ." for snapshots.",
612 },
613 vmstatestorage => get_standard_option('pve-storage-id', {
614 description => "Default storage for VM state volumes/files.",
615 optional => 1,
616 }),
617 runningmachine => get_standard_option('pve-qemu-machine', {
618 description => "Specifies the QEMU machine type of the running vm. This is used internally"
619 ." for snapshots.",
620 }),
621 runningcpu => {
622 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
623 ." internally for snapshots.",
624 optional => 1,
625 type => 'string',
626 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
627 format_description => 'QEMU -cpu parameter'
628 },
629 machine => get_standard_option('pve-qemu-machine'),
630 arch => {
631 description => "Virtual processor architecture. Defaults to the host.",
632 optional => 1,
633 type => 'string',
634 enum => [qw(x86_64 aarch64)],
635 },
636 smbios1 => {
637 description => "Specify SMBIOS type 1 fields.",
638 type => 'string', format => 'pve-qm-smbios1',
639 maxLength => 512,
640 optional => 1,
641 },
642 protection => {
643 optional => 1,
644 type => 'boolean',
645 description => "Sets the protection flag of the VM. This will disable the remove VM and"
646 ." remove disk operations.",
647 default => 0,
648 },
649 bios => {
650 optional => 1,
651 type => 'string',
652 enum => [ qw(seabios ovmf) ],
653 description => "Select BIOS implementation.",
654 default => 'seabios',
655 },
656 vmgenid => {
657 type => 'string',
658 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
659 format_description => 'UUID',
660 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
661 ." to disable explicitly.",
662 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
663 ." value identifier to the guest OS. This allows to notify the guest operating system"
664 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
665 ." execution or creation from a template). The guest operating system notices the"
666 ." change, and is then able to react as appropriate by marking its copies of"
667 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
668 ."Note that auto-creation only works when done through API/CLI create or update methods"
669 .", but not when manually editing the config file.",
670 default => "1 (autogenerated)",
671 optional => 1,
672 },
673 hookscript => {
674 type => 'string',
675 format => 'pve-volume-id',
676 optional => 1,
677 description => "Script that will be executed during various steps in the vms lifetime.",
678 },
679 ivshmem => {
680 type => 'string',
681 format => $ivshmem_fmt,
682 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
683 ." the host.",
684 optional => 1,
685 },
686 audio0 => {
687 type => 'string',
688 format => $audio_fmt,
689 description => "Configure a audio device, useful in combination with QXL/Spice.",
690 optional => 1
691 },
692 spice_enhancements => {
693 type => 'string',
694 format => $spice_enhancements_fmt,
695 description => "Configure additional enhancements for SPICE.",
696 optional => 1
697 },
698 tags => {
699 type => 'string', format => 'pve-tag-list',
700 description => 'Tags of the VM. This is only meta information.',
701 optional => 1,
702 },
703 rng0 => {
704 type => 'string',
705 format => $rng_fmt,
706 description => "Configure a VirtIO-based Random Number Generator.",
707 optional => 1,
708 },
709 meta => {
710 type => 'string',
711 format => $meta_info_fmt,
712 description => "Some (read-only) meta-information about this guest.",
713 optional => 1,
714 },
715 affinity => {
716 type => 'string', format => 'pve-cpuset',
717 description => "List of host cores used to execute guest processes, for example: 0,5,8-11",
718 optional => 1,
719 },
720 };
721
722 my $cicustom_fmt = {
723 meta => {
724 type => 'string',
725 optional => 1,
726 description => 'Specify a custom file containing all meta data passed to the VM via"
727 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
728 format => 'pve-volume-id',
729 format_description => 'volume',
730 },
731 network => {
732 type => 'string',
733 optional => 1,
734 description => 'Specify a custom file containing all network data passed to the VM via'
735 .' cloud-init.',
736 format => 'pve-volume-id',
737 format_description => 'volume',
738 },
739 user => {
740 type => 'string',
741 optional => 1,
742 description => 'Specify a custom file containing all user data passed to the VM via'
743 .' cloud-init.',
744 format => 'pve-volume-id',
745 format_description => 'volume',
746 },
747 vendor => {
748 type => 'string',
749 optional => 1,
750 description => 'Specify a custom file containing all vendor data passed to the VM via'
751 .' cloud-init.',
752 format => 'pve-volume-id',
753 format_description => 'volume',
754 },
755 };
756 PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
757
758 my $confdesc_cloudinit = {
759 citype => {
760 optional => 1,
761 type => 'string',
762 description => 'Specifies the cloud-init configuration format. The default depends on the'
763 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
764 .' and `configdrive2` for windows.',
765 enum => ['configdrive2', 'nocloud', 'opennebula'],
766 },
767 ciuser => {
768 optional => 1,
769 type => 'string',
770 description => "cloud-init: User name to change ssh keys and password for instead of the"
771 ." image's configured default user.",
772 },
773 cipassword => {
774 optional => 1,
775 type => 'string',
776 description => 'cloud-init: Password to assign the user. Using this is generally not'
777 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
778 .' support hashed passwords.',
779 },
780 cicustom => {
781 optional => 1,
782 type => 'string',
783 description => 'cloud-init: Specify custom files to replace the automatically generated'
784 .' ones at start.',
785 format => 'pve-qm-cicustom',
786 },
787 searchdomain => {
788 optional => 1,
789 type => 'string',
790 description => 'cloud-init: Sets DNS search domains for a container. Create will'
791 .' automatically use the setting from the host if neither searchdomain nor nameserver'
792 .' are set.',
793 },
794 nameserver => {
795 optional => 1,
796 type => 'string', format => 'address-list',
797 description => 'cloud-init: Sets DNS server IP address for a container. Create will'
798 .' automatically use the setting from the host if neither searchdomain nor nameserver'
799 .' are set.',
800 },
801 sshkeys => {
802 optional => 1,
803 type => 'string',
804 format => 'urlencoded',
805 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
806 },
807 };
808
809 # what about other qemu settings ?
810 #cpu => 'string',
811 #machine => 'string',
812 #fda => 'file',
813 #fdb => 'file',
814 #mtdblock => 'file',
815 #sd => 'file',
816 #pflash => 'file',
817 #snapshot => 'bool',
818 #bootp => 'file',
819 ##tftp => 'dir',
820 ##smb => 'dir',
821 #kernel => 'file',
822 #append => 'string',
823 #initrd => 'file',
824 ##soundhw => 'string',
825
826 while (my ($k, $v) = each %$confdesc) {
827 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
828 }
829
830 my $MAX_USB_DEVICES = 14;
831 my $MAX_NETS = 32;
832 my $MAX_SERIAL_PORTS = 4;
833 my $MAX_PARALLEL_PORTS = 3;
834 my $MAX_NUMA = 8;
835
836 my $numa_fmt = {
837 cpus => {
838 type => "string",
839 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
840 description => "CPUs accessing this NUMA node.",
841 format_description => "id[-id];...",
842 },
843 memory => {
844 type => "number",
845 description => "Amount of memory this NUMA node provides.",
846 optional => 1,
847 },
848 hostnodes => {
849 type => "string",
850 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
851 description => "Host NUMA nodes to use.",
852 format_description => "id[-id];...",
853 optional => 1,
854 },
855 policy => {
856 type => 'string',
857 enum => [qw(preferred bind interleave)],
858 description => "NUMA allocation policy.",
859 optional => 1,
860 },
861 };
862 PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
863 my $numadesc = {
864 optional => 1,
865 type => 'string', format => $numa_fmt,
866 description => "NUMA topology.",
867 };
868 PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
869
870 for (my $i = 0; $i < $MAX_NUMA; $i++) {
871 $confdesc->{"numa$i"} = $numadesc;
872 }
873
874 my $nic_model_list = [
875 'e1000',
876 'e1000-82540em',
877 'e1000-82544gc',
878 'e1000-82545em',
879 'e1000e',
880 'i82551',
881 'i82557b',
882 'i82559er',
883 'ne2k_isa',
884 'ne2k_pci',
885 'pcnet',
886 'rtl8139',
887 'virtio',
888 'vmxnet3',
889 ];
890 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
891
892 my $net_fmt_bridge_descr = <<__EOD__;
893 Bridge to attach the network device to. The Proxmox VE standard bridge
894 is called 'vmbr0'.
895
896 If you do not specify a bridge, we create a kvm user (NATed) network
897 device, which provides DHCP and DNS services. The following addresses
898 are used:
899
900 10.0.2.2 Gateway
901 10.0.2.3 DNS Server
902 10.0.2.4 SMB Server
903
904 The DHCP server assign addresses to the guest starting from 10.0.2.15.
905 __EOD__
906
907 my $net_fmt = {
908 macaddr => get_standard_option('mac-addr', {
909 description => "MAC address. That address must be unique withing your network. This is"
910 ." automatically generated if not specified.",
911 }),
912 model => {
913 type => 'string',
914 description => "Network Card Model. The 'virtio' model provides the best performance with"
915 ." very low CPU overhead. If your guest does not support this driver, it is usually"
916 ." best to use 'e1000'.",
917 enum => $nic_model_list,
918 default_key => 1,
919 },
920 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
921 bridge => get_standard_option('pve-bridge-id', {
922 description => $net_fmt_bridge_descr,
923 optional => 1,
924 }),
925 queues => {
926 type => 'integer',
927 minimum => 0, maximum => 64,
928 description => 'Number of packet queues to be used on the device.',
929 optional => 1,
930 },
931 rate => {
932 type => 'number',
933 minimum => 0,
934 description => "Rate limit in mbps (megabytes per second) as floating point number.",
935 optional => 1,
936 },
937 tag => {
938 type => 'integer',
939 minimum => 1, maximum => 4094,
940 description => 'VLAN tag to apply to packets on this interface.',
941 optional => 1,
942 },
943 trunks => {
944 type => 'string',
945 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
946 description => 'VLAN trunks to pass through this interface.',
947 format_description => 'vlanid[;vlanid...]',
948 optional => 1,
949 },
950 firewall => {
951 type => 'boolean',
952 description => 'Whether this interface should be protected by the firewall.',
953 optional => 1,
954 },
955 link_down => {
956 type => 'boolean',
957 description => 'Whether this interface should be disconnected (like pulling the plug).',
958 optional => 1,
959 },
960 mtu => {
961 type => 'integer',
962 minimum => 1, maximum => 65520,
963 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
964 optional => 1,
965 },
966 };
967
968 my $netdesc = {
969 optional => 1,
970 type => 'string', format => $net_fmt,
971 description => "Specify network devices.",
972 };
973
974 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
975
976 my $ipconfig_fmt = {
977 ip => {
978 type => 'string',
979 format => 'pve-ipv4-config',
980 format_description => 'IPv4Format/CIDR',
981 description => 'IPv4 address in CIDR format.',
982 optional => 1,
983 default => 'dhcp',
984 },
985 gw => {
986 type => 'string',
987 format => 'ipv4',
988 format_description => 'GatewayIPv4',
989 description => 'Default gateway for IPv4 traffic.',
990 optional => 1,
991 requires => 'ip',
992 },
993 ip6 => {
994 type => 'string',
995 format => 'pve-ipv6-config',
996 format_description => 'IPv6Format/CIDR',
997 description => 'IPv6 address in CIDR format.',
998 optional => 1,
999 default => 'dhcp',
1000 },
1001 gw6 => {
1002 type => 'string',
1003 format => 'ipv6',
1004 format_description => 'GatewayIPv6',
1005 description => 'Default gateway for IPv6 traffic.',
1006 optional => 1,
1007 requires => 'ip6',
1008 },
1009 };
1010 PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
1011 my $ipconfigdesc = {
1012 optional => 1,
1013 type => 'string', format => 'pve-qm-ipconfig',
1014 description => <<'EODESCR',
1015 cloud-init: Specify IP addresses and gateways for the corresponding interface.
1016
1017 IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1018
1019 The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1020 gateway should be provided.
1021 For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1022 cloud-init 19.4 or newer.
1023
1024 If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1025 dhcp on IPv4.
1026 EODESCR
1027 };
1028 PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1029
1030 for (my $i = 0; $i < $MAX_NETS; $i++) {
1031 $confdesc->{"net$i"} = $netdesc;
1032 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1033 }
1034
1035 foreach my $key (keys %$confdesc_cloudinit) {
1036 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1037 }
1038
1039 PVE::JSONSchema::register_format('pve-cpuset', \&pve_verify_cpuset);
1040 sub pve_verify_cpuset {
1041 my ($set_text, $noerr) = @_;
1042
1043 my ($count, $members) = eval { PVE::CpuSet::parse_cpuset($set_text) };
1044
1045 if ($@) {
1046 return if $noerr;
1047 die "unable to parse cpuset option\n";
1048 }
1049
1050 return PVE::CpuSet->new($members)->short_string();
1051 }
1052
1053 PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1054 sub verify_volume_id_or_qm_path {
1055 my ($volid, $noerr) = @_;
1056
1057 return $volid if $volid eq 'none' || $volid eq 'cdrom';
1058
1059 return verify_volume_id_or_absolute_path($volid, $noerr);
1060 }
1061
1062 PVE::JSONSchema::register_format('pve-volume-id-or-absolute-path', \&verify_volume_id_or_absolute_path);
1063 sub verify_volume_id_or_absolute_path {
1064 my ($volid, $noerr) = @_;
1065
1066 return $volid if $volid =~ m|^/|;
1067
1068 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1069 if ($@) {
1070 return if $noerr;
1071 die $@;
1072 }
1073 return $volid;
1074 }
1075
1076 my $usb_fmt = {
1077 host => {
1078 default_key => 1,
1079 type => 'string', format => 'pve-qm-usb-device',
1080 format_description => 'HOSTUSBDEVICE|spice',
1081 description => <<EODESCR,
1082 The Host USB device or port or the value 'spice'. HOSTUSBDEVICE syntax is:
1083
1084 'bus-port(.port)*' (decimal numbers) or
1085 'vendor_id:product_id' (hexadeciaml numbers) or
1086 'spice'
1087
1088 You can use the 'lsusb -t' command to list existing usb devices.
1089
1090 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1091 machines - use with special care.
1092
1093 The value 'spice' can be used to add a usb redirection devices for spice.
1094 EODESCR
1095 },
1096 usb3 => {
1097 optional => 1,
1098 type => 'boolean',
1099 description => "Specifies whether if given host option is a USB3 device or port."
1100 ." For modern guests (machine version >= 7.1 and ostype l26 and windows > 7), this flag"
1101 ." is irrelevant (all devices are plugged into a xhci controller).",
1102 default => 0,
1103 },
1104 };
1105
1106 my $usbdesc = {
1107 optional => 1,
1108 type => 'string', format => $usb_fmt,
1109 description => "Configure an USB device (n is 0 to 4, for machine version >= 7.1 and ostype"
1110 ." l26 or windows > 7, n can be up to 14).",
1111 };
1112 PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
1113
1114 my $serialdesc = {
1115 optional => 1,
1116 type => 'string',
1117 pattern => '(/dev/.+|socket)',
1118 description => "Create a serial device inside the VM (n is 0 to 3)",
1119 verbose_description => <<EODESCR,
1120 Create a serial device inside the VM (n is 0 to 3), and pass through a
1121 host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1122 host side (use 'qm terminal' to open a terminal connection).
1123
1124 NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1125 use with special care.
1126
1127 CAUTION: Experimental! User reported problems with this option.
1128 EODESCR
1129 };
1130
1131 my $paralleldesc= {
1132 optional => 1,
1133 type => 'string',
1134 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1135 description => "Map host parallel devices (n is 0 to 2).",
1136 verbose_description => <<EODESCR,
1137 Map host parallel devices (n is 0 to 2).
1138
1139 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1140 machines - use with special care.
1141
1142 CAUTION: Experimental! User reported problems with this option.
1143 EODESCR
1144 };
1145
1146 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1147 $confdesc->{"parallel$i"} = $paralleldesc;
1148 }
1149
1150 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1151 $confdesc->{"serial$i"} = $serialdesc;
1152 }
1153
1154 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1155 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1156 }
1157
1158 for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1159 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1160 }
1161
1162 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1163 $confdesc->{"usb$i"} = $usbdesc;
1164 }
1165
1166 my $boot_fmt = {
1167 legacy => {
1168 optional => 1,
1169 default_key => 1,
1170 type => 'string',
1171 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1172 . " Deprecated, use 'order=' instead.",
1173 pattern => '[acdn]{1,4}',
1174 format_description => "[acdn]{1,4}",
1175
1176 # note: this is also the fallback if boot: is not given at all
1177 default => 'cdn',
1178 },
1179 order => {
1180 optional => 1,
1181 type => 'string',
1182 format => 'pve-qm-bootdev-list',
1183 format_description => "device[;device...]",
1184 description => <<EODESC,
1185 The guest will attempt to boot from devices in the order they appear here.
1186
1187 Disks, optical drives and passed-through storage USB devices will be directly
1188 booted from, NICs will load PXE, and PCIe devices will either behave like disks
1189 (e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1190
1191 Note that only devices in this list will be marked as bootable and thus loaded
1192 by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1193 (e.g. software-raid), you need to specify all of them here.
1194
1195 Overrides the deprecated 'legacy=[acdn]*' value when given.
1196 EODESC
1197 },
1198 };
1199 PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1200
1201 PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1202 sub verify_bootdev {
1203 my ($dev, $noerr) = @_;
1204
1205 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1206 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1207
1208 my $check = sub {
1209 my ($base) = @_;
1210 return 0 if $dev !~ m/^$base\d+$/;
1211 return 0 if !$confdesc->{$dev};
1212 return 1;
1213 };
1214
1215 return $dev if $check->("net");
1216 return $dev if $check->("usb");
1217 return $dev if $check->("hostpci");
1218
1219 return if $noerr;
1220 die "invalid boot device '$dev'\n";
1221 }
1222
1223 sub print_bootorder {
1224 my ($devs) = @_;
1225 return "" if !@$devs;
1226 my $data = { order => join(';', @$devs) };
1227 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1228 }
1229
1230 my $kvm_api_version = 0;
1231
1232 sub kvm_version {
1233 return $kvm_api_version if $kvm_api_version;
1234
1235 open my $fh, '<', '/dev/kvm' or return;
1236
1237 # 0xae00 => KVM_GET_API_VERSION
1238 $kvm_api_version = ioctl($fh, 0xae00, 0);
1239 close($fh);
1240
1241 return $kvm_api_version;
1242 }
1243
1244 my $kvm_user_version = {};
1245 my $kvm_mtime = {};
1246
1247 sub kvm_user_version {
1248 my ($binary) = @_;
1249
1250 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1251 my $st = stat($binary);
1252
1253 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1254 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1255 $cachedmtime == $st->mtime;
1256
1257 $kvm_user_version->{$binary} = 'unknown';
1258 $kvm_mtime->{$binary} = $st->mtime;
1259
1260 my $code = sub {
1261 my $line = shift;
1262 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1263 $kvm_user_version->{$binary} = $2;
1264 }
1265 };
1266
1267 eval { run_command([$binary, '--version'], outfunc => $code); };
1268 warn $@ if $@;
1269
1270 return $kvm_user_version->{$binary};
1271
1272 }
1273 my sub extract_version {
1274 my ($machine_type, $version) = @_;
1275 $version = kvm_user_version() if !defined($version);
1276 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
1277 }
1278
1279 sub kernel_has_vhost_net {
1280 return -c '/dev/vhost-net';
1281 }
1282
1283 sub option_exists {
1284 my $key = shift;
1285 return defined($confdesc->{$key});
1286 }
1287
1288 my $cdrom_path;
1289 sub get_cdrom_path {
1290
1291 return $cdrom_path if defined($cdrom_path);
1292
1293 $cdrom_path = first { -l $_ } map { "/dev/cdrom$_" } ('', '1', '2');
1294
1295 if (!defined($cdrom_path)) {
1296 log_warn("no physical CD-ROM available, ignoring");
1297 $cdrom_path = '';
1298 }
1299
1300 return $cdrom_path;
1301 }
1302
1303 sub get_iso_path {
1304 my ($storecfg, $vmid, $cdrom) = @_;
1305
1306 if ($cdrom eq 'cdrom') {
1307 return get_cdrom_path();
1308 } elsif ($cdrom eq 'none') {
1309 return '';
1310 } elsif ($cdrom =~ m|^/|) {
1311 return $cdrom;
1312 } else {
1313 return PVE::Storage::path($storecfg, $cdrom);
1314 }
1315 }
1316
1317 # try to convert old style file names to volume IDs
1318 sub filename_to_volume_id {
1319 my ($vmid, $file, $media) = @_;
1320
1321 if (!($file eq 'none' || $file eq 'cdrom' ||
1322 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1323
1324 return if $file =~ m|/|;
1325
1326 if ($media && $media eq 'cdrom') {
1327 $file = "local:iso/$file";
1328 } else {
1329 $file = "local:$vmid/$file";
1330 }
1331 }
1332
1333 return $file;
1334 }
1335
1336 sub verify_media_type {
1337 my ($opt, $vtype, $media) = @_;
1338
1339 return if !$media;
1340
1341 my $etype;
1342 if ($media eq 'disk') {
1343 $etype = 'images';
1344 } elsif ($media eq 'cdrom') {
1345 $etype = 'iso';
1346 } else {
1347 die "internal error";
1348 }
1349
1350 return if ($vtype eq $etype);
1351
1352 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1353 }
1354
1355 sub cleanup_drive_path {
1356 my ($opt, $storecfg, $drive) = @_;
1357
1358 # try to convert filesystem paths to volume IDs
1359
1360 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1361 ($drive->{file} !~ m|^/dev/.+|) &&
1362 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1363 ($drive->{file} !~ m/^\d+$/)) {
1364 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1365 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1366 if !$vtype;
1367 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1368 verify_media_type($opt, $vtype, $drive->{media});
1369 $drive->{file} = $volid;
1370 }
1371
1372 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1373 }
1374
1375 sub parse_hotplug_features {
1376 my ($data) = @_;
1377
1378 my $res = {};
1379
1380 return $res if $data eq '0';
1381
1382 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1383
1384 foreach my $feature (PVE::Tools::split_list($data)) {
1385 if ($feature =~ m/^(network|disk|cpu|memory|usb|cloudinit)$/) {
1386 $res->{$1} = 1;
1387 } else {
1388 die "invalid hotplug feature '$feature'\n";
1389 }
1390 }
1391 return $res;
1392 }
1393
1394 PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1395 sub pve_verify_hotplug_features {
1396 my ($value, $noerr) = @_;
1397
1398 return $value if parse_hotplug_features($value);
1399
1400 return if $noerr;
1401
1402 die "unable to parse hotplug option\n";
1403 }
1404
1405 sub scsi_inquiry {
1406 my($fh, $noerr) = @_;
1407
1408 my $SG_IO = 0x2285;
1409 my $SG_GET_VERSION_NUM = 0x2282;
1410
1411 my $versionbuf = "\x00" x 8;
1412 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1413 if (!$ret) {
1414 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1415 return;
1416 }
1417 my $version = unpack("I", $versionbuf);
1418 if ($version < 30000) {
1419 die "scsi generic interface too old\n" if !$noerr;
1420 return;
1421 }
1422
1423 my $buf = "\x00" x 36;
1424 my $sensebuf = "\x00" x 8;
1425 my $cmd = pack("C x3 C x1", 0x12, 36);
1426
1427 # see /usr/include/scsi/sg.h
1428 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1429
1430 my $packet = pack(
1431 $sg_io_hdr_t, ord('S'), -3, length($cmd), length($sensebuf), 0, length($buf), $buf, $cmd, $sensebuf, 6000
1432 );
1433
1434 $ret = ioctl($fh, $SG_IO, $packet);
1435 if (!$ret) {
1436 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1437 return;
1438 }
1439
1440 my @res = unpack($sg_io_hdr_t, $packet);
1441 if ($res[17] || $res[18]) {
1442 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1443 return;
1444 }
1445
1446 my $res = {};
1447 $res->@{qw(type removable vendor product revision)} = unpack("C C x6 A8 A16 A4", $buf);
1448
1449 $res->{removable} = $res->{removable} & 128 ? 1 : 0;
1450 $res->{type} &= 0x1F;
1451
1452 return $res;
1453 }
1454
1455 sub path_is_scsi {
1456 my ($path) = @_;
1457
1458 my $fh = IO::File->new("+<$path") || return;
1459 my $res = scsi_inquiry($fh, 1);
1460 close($fh);
1461
1462 return $res;
1463 }
1464
1465 sub print_tabletdevice_full {
1466 my ($conf, $arch) = @_;
1467
1468 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1469
1470 # we use uhci for old VMs because tablet driver was buggy in older qemu
1471 my $usbbus;
1472 if ($q35 || $arch eq 'aarch64') {
1473 $usbbus = 'ehci';
1474 } else {
1475 $usbbus = 'uhci';
1476 }
1477
1478 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1479 }
1480
1481 sub print_keyboarddevice_full {
1482 my ($conf, $arch) = @_;
1483
1484 return if $arch ne 'aarch64';
1485
1486 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1487 }
1488
1489 my sub get_drive_id {
1490 my ($drive) = @_;
1491 return "$drive->{interface}$drive->{index}";
1492 }
1493
1494 sub print_drivedevice_full {
1495 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1496
1497 my $device = '';
1498 my $maxdev = 0;
1499
1500 my $drive_id = get_drive_id($drive);
1501 if ($drive->{interface} eq 'virtio') {
1502 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1503 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1504 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1505 } elsif ($drive->{interface} eq 'scsi') {
1506
1507 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1508 my $unit = $drive->{index} % $maxdev;
1509 my $devicetype = 'hd';
1510 my $path = '';
1511 if (drive_is_cdrom($drive)) {
1512 $devicetype = 'cd';
1513 } else {
1514 if ($drive->{file} =~ m|^/|) {
1515 $path = $drive->{file};
1516 if (my $info = path_is_scsi($path)) {
1517 if ($info->{type} == 0 && $drive->{scsiblock}) {
1518 $devicetype = 'block';
1519 } elsif ($info->{type} == 1) { # tape
1520 $devicetype = 'generic';
1521 }
1522 }
1523 } else {
1524 $path = PVE::Storage::path($storecfg, $drive->{file});
1525 }
1526
1527 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
1528 my $version = extract_version($machine_type, kvm_user_version());
1529 if ($path =~ m/^iscsi\:\/\// &&
1530 !min_version($version, 4, 1)) {
1531 $devicetype = 'generic';
1532 }
1533 }
1534
1535 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1536 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
1537 } else {
1538 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1539 .",lun=$drive->{index}";
1540 }
1541 $device .= ",drive=drive-$drive_id,id=$drive_id";
1542
1543 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1544 $device .= ",rotation_rate=1";
1545 }
1546 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1547
1548 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1549 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1550 my $controller = int($drive->{index} / $maxdev);
1551 my $unit = $drive->{index} % $maxdev;
1552 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1553
1554 $device = "ide-$devicetype";
1555 if ($drive->{interface} eq 'ide') {
1556 $device .= ",bus=ide.$controller,unit=$unit";
1557 } else {
1558 $device .= ",bus=ahci$controller.$unit";
1559 }
1560 $device .= ",drive=drive-$drive_id,id=$drive_id";
1561
1562 if ($devicetype eq 'hd') {
1563 if (my $model = $drive->{model}) {
1564 $model = URI::Escape::uri_unescape($model);
1565 $device .= ",model=$model";
1566 }
1567 if ($drive->{ssd}) {
1568 $device .= ",rotation_rate=1";
1569 }
1570 }
1571 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1572 } elsif ($drive->{interface} eq 'usb') {
1573 die "implement me";
1574 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1575 } else {
1576 die "unsupported interface type";
1577 }
1578
1579 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1580
1581 if (my $serial = $drive->{serial}) {
1582 $serial = URI::Escape::uri_unescape($serial);
1583 $device .= ",serial=$serial";
1584 }
1585
1586
1587 return $device;
1588 }
1589
1590 sub get_initiator_name {
1591 my $initiator;
1592
1593 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1594 while (defined(my $line = <$fh>)) {
1595 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1596 $initiator = $1;
1597 last;
1598 }
1599 $fh->close();
1600
1601 return $initiator;
1602 }
1603
1604 sub print_drive_commandline_full {
1605 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1606
1607 my $path;
1608 my $volid = $drive->{file};
1609 my $format = $drive->{format};
1610 my $drive_id = get_drive_id($drive);
1611
1612 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1613 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1614
1615 if (drive_is_cdrom($drive)) {
1616 $path = get_iso_path($storecfg, $vmid, $volid);
1617 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
1618 } else {
1619 if ($storeid) {
1620 $path = PVE::Storage::path($storecfg, $volid);
1621 $format //= qemu_img_format($scfg, $volname);
1622 } else {
1623 $path = $volid;
1624 $format //= "raw";
1625 }
1626 }
1627
1628 my $is_rbd = $path =~ m/^rbd:/;
1629
1630 my $opts = '';
1631 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1632 foreach my $o (@qemu_drive_options) {
1633 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1634 }
1635
1636 # snapshot only accepts on|off
1637 if (defined($drive->{snapshot})) {
1638 my $v = $drive->{snapshot} ? 'on' : 'off';
1639 $opts .= ",snapshot=$v";
1640 }
1641
1642 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1643 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
1644 }
1645
1646 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1647 my ($dir, $qmpname) = @$type;
1648 if (my $v = $drive->{"mbps$dir"}) {
1649 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1650 }
1651 if (my $v = $drive->{"mbps${dir}_max"}) {
1652 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1653 }
1654 if (my $v = $drive->{"bps${dir}_max_length"}) {
1655 $opts .= ",throttling.bps$qmpname-max-length=$v";
1656 }
1657 if (my $v = $drive->{"iops${dir}"}) {
1658 $opts .= ",throttling.iops$qmpname=$v";
1659 }
1660 if (my $v = $drive->{"iops${dir}_max"}) {
1661 $opts .= ",throttling.iops$qmpname-max=$v";
1662 }
1663 if (my $v = $drive->{"iops${dir}_max_length"}) {
1664 $opts .= ",throttling.iops$qmpname-max-length=$v";
1665 }
1666 }
1667
1668 if ($pbs_name) {
1669 $format = "rbd" if $is_rbd;
1670 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1671 if !$format;
1672 $opts .= ",format=alloc-track,file.driver=$format";
1673 } elsif ($format) {
1674 $opts .= ",format=$format";
1675 }
1676
1677 my $cache_direct = 0;
1678
1679 if (my $cache = $drive->{cache}) {
1680 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1681 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1682 $opts .= ",cache=none";
1683 $cache_direct = 1;
1684 }
1685
1686 # io_uring with cache mode writeback or writethrough on krbd will hang...
1687 my $rbd_no_io_uring = $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1688
1689 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1690 # sometimes, just plain disable...
1691 my $lvm_no_io_uring = $scfg && $scfg->{type} eq 'lvm';
1692
1693 # io_uring causes problems when used with CIFS since kernel 5.15
1694 # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
1695 my $cifs_no_io_uring = $scfg && $scfg->{type} eq 'cifs';
1696
1697 if (!$drive->{aio}) {
1698 if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring && !$cifs_no_io_uring) {
1699 # io_uring supports all cache modes
1700 $opts .= ",aio=io_uring";
1701 } else {
1702 # aio native works only with O_DIRECT
1703 if($cache_direct) {
1704 $opts .= ",aio=native";
1705 } else {
1706 $opts .= ",aio=threads";
1707 }
1708 }
1709 }
1710
1711 if (!drive_is_cdrom($drive)) {
1712 my $detectzeroes;
1713 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1714 $detectzeroes = 'off';
1715 } elsif ($drive->{discard}) {
1716 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1717 } else {
1718 # This used to be our default with discard not being specified:
1719 $detectzeroes = 'on';
1720 }
1721
1722 # note: 'detect-zeroes' works per blockdev and we want it to persist
1723 # after the alloc-track is removed, so put it on 'file' directly
1724 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1725 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1726 }
1727
1728 if ($pbs_name) {
1729 $opts .= ",backing=$pbs_name";
1730 $opts .= ",auto-remove=on";
1731 }
1732
1733 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1734 my $file_param = "file";
1735 if ($pbs_name) {
1736 # non-rbd drivers require the underlying file to be a seperate block
1737 # node, so add a second .file indirection
1738 $file_param .= ".file" if !$is_rbd;
1739 $file_param .= ".filename";
1740 }
1741 my $pathinfo = $path ? "$file_param=$path," : '';
1742
1743 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1744 }
1745
1746 sub print_pbs_blockdev {
1747 my ($pbs_conf, $pbs_name) = @_;
1748 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1749 $blockdev .= ",repository=$pbs_conf->{repository}";
1750 $blockdev .= ",namespace=$pbs_conf->{namespace}" if $pbs_conf->{namespace};
1751 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1752 $blockdev .= ",archive=$pbs_conf->{archive}";
1753 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1754 return $blockdev;
1755 }
1756
1757 sub print_netdevice_full {
1758 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version) = @_;
1759
1760 my $device = $net->{model};
1761 if ($net->{model} eq 'virtio') {
1762 $device = 'virtio-net-pci';
1763 };
1764
1765 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1766 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1767 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1768 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1769 # and out of each queue plus one config interrupt and control vector queue
1770 my $vectors = $net->{queues} * 2 + 2;
1771 $tmpstr .= ",vectors=$vectors,mq=on";
1772 if (min_version($machine_version, 7, 1)) {
1773 $tmpstr .= ",packed=on";
1774 }
1775 }
1776
1777 if (min_version($machine_version, 7, 1) && $net->{model} eq 'virtio'){
1778 $tmpstr .= ",rx_queue_size=1024,tx_queue_size=1024";
1779 }
1780
1781 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1782
1783 if (my $mtu = $net->{mtu}) {
1784 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1785 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1786 if ($mtu == 1) {
1787 $mtu = $bridge_mtu;
1788 } elsif ($mtu < 576) {
1789 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1790 } elsif ($mtu > $bridge_mtu) {
1791 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1792 }
1793 $tmpstr .= ",host_mtu=$mtu";
1794 } else {
1795 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1796 }
1797 }
1798
1799 if ($use_old_bios_files) {
1800 my $romfile;
1801 if ($device eq 'virtio-net-pci') {
1802 $romfile = 'pxe-virtio.rom';
1803 } elsif ($device eq 'e1000') {
1804 $romfile = 'pxe-e1000.rom';
1805 } elsif ($device eq 'e1000e') {
1806 $romfile = 'pxe-e1000e.rom';
1807 } elsif ($device eq 'ne2k') {
1808 $romfile = 'pxe-ne2k_pci.rom';
1809 } elsif ($device eq 'pcnet') {
1810 $romfile = 'pxe-pcnet.rom';
1811 } elsif ($device eq 'rtl8139') {
1812 $romfile = 'pxe-rtl8139.rom';
1813 }
1814 $tmpstr .= ",romfile=$romfile" if $romfile;
1815 }
1816
1817 return $tmpstr;
1818 }
1819
1820 sub print_netdev_full {
1821 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1822
1823 my $i = '';
1824 if ($netid =~ m/^net(\d+)$/) {
1825 $i = int($1);
1826 }
1827
1828 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1829
1830 my $ifname = "tap${vmid}i$i";
1831
1832 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1833 die "interface name '$ifname' is too long (max 15 character)\n"
1834 if length($ifname) >= 16;
1835
1836 my $vhostparam = '';
1837 if (is_native($arch)) {
1838 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1839 }
1840
1841 my $vmname = $conf->{name} || "vm$vmid";
1842
1843 my $netdev = "";
1844 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1845
1846 if ($net->{bridge}) {
1847 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1848 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1849 } else {
1850 $netdev = "type=user,id=$netid,hostname=$vmname";
1851 }
1852
1853 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1854
1855 return $netdev;
1856 }
1857
1858 my $vga_map = {
1859 'cirrus' => 'cirrus-vga',
1860 'std' => 'VGA',
1861 'vmware' => 'vmware-svga',
1862 'virtio' => 'virtio-vga',
1863 'virtio-gl' => 'virtio-vga-gl',
1864 };
1865
1866 sub print_vga_device {
1867 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1868
1869 my $type = $vga_map->{$vga->{type}};
1870 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1871 $type = 'virtio-gpu';
1872 }
1873 my $vgamem_mb = $vga->{memory};
1874
1875 my $max_outputs = '';
1876 if ($qxlnum) {
1877 $type = $id ? 'qxl' : 'qxl-vga';
1878
1879 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1880 # set max outputs so linux can have up to 4 qxl displays with one device
1881 if (min_version($machine_version, 4, 1)) {
1882 $max_outputs = ",max_outputs=4";
1883 }
1884 }
1885 }
1886
1887 die "no devicetype for $vga->{type}\n" if !$type;
1888
1889 my $memory = "";
1890 if ($vgamem_mb) {
1891 if ($vga->{type} =~ /^virtio/) {
1892 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1893 $memory = ",max_hostmem=$bytes";
1894 } elsif ($qxlnum) {
1895 # from https://www.spice-space.org/multiple-monitors.html
1896 $memory = ",vgamem_mb=$vga->{memory}";
1897 my $ram = $vgamem_mb * 4;
1898 my $vram = $vgamem_mb * 2;
1899 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1900 } else {
1901 $memory = ",vgamem_mb=$vga->{memory}";
1902 }
1903 } elsif ($qxlnum && $id) {
1904 $memory = ",ram_size=67108864,vram_size=33554432";
1905 }
1906
1907 my $edidoff = "";
1908 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1909 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1910 }
1911
1912 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1913 my $vgaid = "vga" . ($id // '');
1914 my $pciaddr;
1915 if ($q35 && $vgaid eq 'vga') {
1916 # the first display uses pcie.0 bus on q35 machines
1917 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1918 } else {
1919 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1920 }
1921
1922 if ($vga->{type} eq 'virtio-gl') {
1923 my $base = '/usr/lib/x86_64-linux-gnu/lib';
1924 die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
1925 if !-e "${base}EGL.so.1" || !-e "${base}GL.so.1";
1926
1927 die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
1928 if !PVE::Tools::dir_glob_regex('/dev/dri/', "renderD.*");
1929 }
1930
1931 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1932 }
1933
1934 sub parse_number_sets {
1935 my ($set) = @_;
1936 my $res = [];
1937 foreach my $part (split(/;/, $set)) {
1938 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1939 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1940 push @$res, [ $1, $2 ];
1941 } else {
1942 die "invalid range: $part\n";
1943 }
1944 }
1945 return $res;
1946 }
1947
1948 sub parse_numa {
1949 my ($data) = @_;
1950
1951 my $res = parse_property_string($numa_fmt, $data);
1952 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1953 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
1954 return $res;
1955 }
1956
1957 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1958 sub parse_net {
1959 my ($data, $disable_mac_autogen) = @_;
1960
1961 my $res = eval { parse_property_string($net_fmt, $data) };
1962 if ($@) {
1963 warn $@;
1964 return;
1965 }
1966 if (!defined($res->{macaddr}) && !$disable_mac_autogen) {
1967 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1968 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1969 }
1970 return $res;
1971 }
1972
1973 # ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1974 sub parse_ipconfig {
1975 my ($data) = @_;
1976
1977 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1978 if ($@) {
1979 warn $@;
1980 return;
1981 }
1982
1983 if ($res->{gw} && !$res->{ip}) {
1984 warn 'gateway specified without specifying an IP address';
1985 return;
1986 }
1987 if ($res->{gw6} && !$res->{ip6}) {
1988 warn 'IPv6 gateway specified without specifying an IPv6 address';
1989 return;
1990 }
1991 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1992 warn 'gateway specified together with DHCP';
1993 return;
1994 }
1995 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1996 # gw6 + auto/dhcp
1997 warn "IPv6 gateway specified together with $res->{ip6} address";
1998 return;
1999 }
2000
2001 if (!$res->{ip} && !$res->{ip6}) {
2002 return { ip => 'dhcp', ip6 => 'dhcp' };
2003 }
2004
2005 return $res;
2006 }
2007
2008 sub print_net {
2009 my $net = shift;
2010
2011 return PVE::JSONSchema::print_property_string($net, $net_fmt);
2012 }
2013
2014 sub add_random_macs {
2015 my ($settings) = @_;
2016
2017 foreach my $opt (keys %$settings) {
2018 next if $opt !~ m/^net(\d+)$/;
2019 my $net = parse_net($settings->{$opt});
2020 next if !$net;
2021 $settings->{$opt} = print_net($net);
2022 }
2023 }
2024
2025 sub vm_is_volid_owner {
2026 my ($storecfg, $vmid, $volid) = @_;
2027
2028 if ($volid !~ m|^/|) {
2029 my ($path, $owner);
2030 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
2031 if ($owner && ($owner == $vmid)) {
2032 return 1;
2033 }
2034 }
2035
2036 return;
2037 }
2038
2039 sub vmconfig_register_unused_drive {
2040 my ($storecfg, $vmid, $conf, $drive) = @_;
2041
2042 if (drive_is_cloudinit($drive)) {
2043 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
2044 warn $@ if $@;
2045 delete $conf->{cloudinit};
2046 } elsif (!drive_is_cdrom($drive)) {
2047 my $volid = $drive->{file};
2048 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
2049 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
2050 }
2051 }
2052 }
2053
2054 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
2055 my $smbios1_fmt = {
2056 uuid => {
2057 type => 'string',
2058 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
2059 format_description => 'UUID',
2060 description => "Set SMBIOS1 UUID.",
2061 optional => 1,
2062 },
2063 version => {
2064 type => 'string',
2065 pattern => '[A-Za-z0-9+\/]+={0,2}',
2066 format_description => 'Base64 encoded string',
2067 description => "Set SMBIOS1 version.",
2068 optional => 1,
2069 },
2070 serial => {
2071 type => 'string',
2072 pattern => '[A-Za-z0-9+\/]+={0,2}',
2073 format_description => 'Base64 encoded string',
2074 description => "Set SMBIOS1 serial number.",
2075 optional => 1,
2076 },
2077 manufacturer => {
2078 type => 'string',
2079 pattern => '[A-Za-z0-9+\/]+={0,2}',
2080 format_description => 'Base64 encoded string',
2081 description => "Set SMBIOS1 manufacturer.",
2082 optional => 1,
2083 },
2084 product => {
2085 type => 'string',
2086 pattern => '[A-Za-z0-9+\/]+={0,2}',
2087 format_description => 'Base64 encoded string',
2088 description => "Set SMBIOS1 product ID.",
2089 optional => 1,
2090 },
2091 sku => {
2092 type => 'string',
2093 pattern => '[A-Za-z0-9+\/]+={0,2}',
2094 format_description => 'Base64 encoded string',
2095 description => "Set SMBIOS1 SKU string.",
2096 optional => 1,
2097 },
2098 family => {
2099 type => 'string',
2100 pattern => '[A-Za-z0-9+\/]+={0,2}',
2101 format_description => 'Base64 encoded string',
2102 description => "Set SMBIOS1 family string.",
2103 optional => 1,
2104 },
2105 base64 => {
2106 type => 'boolean',
2107 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2108 optional => 1,
2109 },
2110 };
2111
2112 sub parse_smbios1 {
2113 my ($data) = @_;
2114
2115 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2116 warn $@ if $@;
2117 return $res;
2118 }
2119
2120 sub print_smbios1 {
2121 my ($smbios1) = @_;
2122 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2123 }
2124
2125 PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2126
2127 sub parse_watchdog {
2128 my ($value) = @_;
2129
2130 return if !$value;
2131
2132 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2133 warn $@ if $@;
2134 return $res;
2135 }
2136
2137 sub parse_guest_agent {
2138 my ($conf) = @_;
2139
2140 return {} if !defined($conf->{agent});
2141
2142 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2143 warn $@ if $@;
2144
2145 # if the agent is disabled ignore the other potentially set properties
2146 return {} if !$res->{enabled};
2147 return $res;
2148 }
2149
2150 sub get_qga_key {
2151 my ($conf, $key) = @_;
2152 return undef if !defined($conf->{agent});
2153
2154 my $agent = parse_guest_agent($conf);
2155 return $agent->{$key};
2156 }
2157
2158 sub parse_vga {
2159 my ($value) = @_;
2160
2161 return {} if !$value;
2162 my $res = eval { parse_property_string($vga_fmt, $value) };
2163 warn $@ if $@;
2164 return $res;
2165 }
2166
2167 sub parse_rng {
2168 my ($value) = @_;
2169
2170 return if !$value;
2171
2172 my $res = eval { parse_property_string($rng_fmt, $value) };
2173 warn $@ if $@;
2174 return $res;
2175 }
2176
2177 sub parse_meta_info {
2178 my ($value) = @_;
2179
2180 return if !$value;
2181
2182 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2183 warn $@ if $@;
2184 return $res;
2185 }
2186
2187 sub new_meta_info_string {
2188 my () = @_; # for now do not allow to override any value
2189
2190 return PVE::JSONSchema::print_property_string(
2191 {
2192 'creation-qemu' => kvm_user_version(),
2193 ctime => "". int(time()),
2194 },
2195 $meta_info_fmt
2196 );
2197 }
2198
2199 sub qemu_created_version_fixups {
2200 my ($conf, $forcemachine, $kvmver) = @_;
2201
2202 my $meta = parse_meta_info($conf->{meta}) // {};
2203 my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
2204
2205 # check if we need to apply some handling for VMs that always use the latest machine version but
2206 # had a machine version transition happen that affected HW such that, e.g., an OS config change
2207 # would be required (we do not want to pin machine version for non-windows OS type)
2208 if (
2209 (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
2210 && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
2211 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
2212 && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
2213 ) {
2214 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
2215 if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
2216 # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
2217 # and thus with the predictable interface naming of systemd
2218 return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
2219 }
2220 }
2221 return;
2222 }
2223
2224 PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
2225 sub verify_usb_device {
2226 my ($value, $noerr) = @_;
2227
2228 return $value if parse_usb_device($value);
2229
2230 return if $noerr;
2231
2232 die "unable to parse usb device\n";
2233 }
2234
2235 # add JSON properties for create and set function
2236 sub json_config_properties {
2237 my ($prop, $with_disk_alloc) = @_;
2238
2239 my $skip_json_config_opts = {
2240 parent => 1,
2241 snaptime => 1,
2242 vmstate => 1,
2243 runningmachine => 1,
2244 runningcpu => 1,
2245 meta => 1,
2246 };
2247
2248 foreach my $opt (keys %$confdesc) {
2249 next if $skip_json_config_opts->{$opt};
2250
2251 if ($with_disk_alloc && is_valid_drivename($opt)) {
2252 $prop->{$opt} = $PVE::QemuServer::Drive::drivedesc_hash_with_alloc->{$opt};
2253 } else {
2254 $prop->{$opt} = $confdesc->{$opt};
2255 }
2256 }
2257
2258 return $prop;
2259 }
2260
2261 # Properties that we can read from an OVF file
2262 sub json_ovf_properties {
2263 my $prop = {};
2264
2265 for my $device (PVE::QemuServer::Drive::valid_drive_names()) {
2266 $prop->{$device} = {
2267 type => 'string',
2268 format => 'pve-volume-id-or-absolute-path',
2269 description => "Disk image that gets imported to $device",
2270 optional => 1,
2271 };
2272 }
2273
2274 $prop->{cores} = {
2275 type => 'integer',
2276 description => "The number of CPU cores.",
2277 optional => 1,
2278 };
2279 $prop->{memory} = {
2280 type => 'integer',
2281 description => "Amount of RAM for the VM in MB.",
2282 optional => 1,
2283 };
2284 $prop->{name} = {
2285 type => 'string',
2286 description => "Name of the VM.",
2287 optional => 1,
2288 };
2289
2290 return $prop;
2291 }
2292
2293 # return copy of $confdesc_cloudinit to generate documentation
2294 sub cloudinit_config_properties {
2295
2296 return dclone($confdesc_cloudinit);
2297 }
2298
2299 sub cloudinit_pending_properties {
2300 my $p = {
2301 map { $_ => 1 } keys $confdesc_cloudinit->%*,
2302 name => 1,
2303 };
2304 $p->{"net$_"} = 1 for 0..($MAX_NETS-1);
2305 return $p;
2306 }
2307
2308 sub check_type {
2309 my ($key, $value) = @_;
2310
2311 die "unknown setting '$key'\n" if !$confdesc->{$key};
2312
2313 my $type = $confdesc->{$key}->{type};
2314
2315 if (!defined($value)) {
2316 die "got undefined value\n";
2317 }
2318
2319 if ($value =~ m/[\n\r]/) {
2320 die "property contains a line feed\n";
2321 }
2322
2323 if ($type eq 'boolean') {
2324 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2325 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2326 die "type check ('boolean') failed - got '$value'\n";
2327 } elsif ($type eq 'integer') {
2328 return int($1) if $value =~ m/^(\d+)$/;
2329 die "type check ('integer') failed - got '$value'\n";
2330 } elsif ($type eq 'number') {
2331 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2332 die "type check ('number') failed - got '$value'\n";
2333 } elsif ($type eq 'string') {
2334 if (my $fmt = $confdesc->{$key}->{format}) {
2335 PVE::JSONSchema::check_format($fmt, $value);
2336 return $value;
2337 }
2338 $value =~ s/^\"(.*)\"$/$1/;
2339 return $value;
2340 } else {
2341 die "internal error"
2342 }
2343 }
2344
2345 sub destroy_vm {
2346 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2347
2348 my $conf = PVE::QemuConfig->load_config($vmid);
2349
2350 if (!$skiplock && !PVE::QemuConfig->has_lock($conf, 'suspended')) {
2351 PVE::QemuConfig->check_lock($conf);
2352 }
2353
2354 if ($conf->{template}) {
2355 # check if any base image is still used by a linked clone
2356 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2357 my ($ds, $drive) = @_;
2358 return if drive_is_cdrom($drive);
2359
2360 my $volid = $drive->{file};
2361 return if !$volid || $volid =~ m|^/|;
2362
2363 die "base volume '$volid' is still in use by linked cloned\n"
2364 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2365
2366 });
2367 }
2368
2369 my $volids = {};
2370 my $remove_owned_drive = sub {
2371 my ($ds, $drive) = @_;
2372 return if drive_is_cdrom($drive, 1);
2373
2374 my $volid = $drive->{file};
2375 return if !$volid || $volid =~ m|^/|;
2376 return if $volids->{$volid};
2377
2378 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2379 return if !$path || !$owner || ($owner != $vmid);
2380
2381 $volids->{$volid} = 1;
2382 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2383 warn "Could not remove disk '$volid', check manually: $@" if $@;
2384 };
2385
2386 # only remove disks owned by this VM (referenced in the config)
2387 my $include_opts = {
2388 include_unused => 1,
2389 extra_keys => ['vmstate'],
2390 };
2391 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2392
2393 for my $snap (values %{$conf->{snapshots}}) {
2394 next if !defined($snap->{vmstate});
2395 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2396 next if !defined($drive);
2397 $remove_owned_drive->('vmstate', $drive);
2398 }
2399
2400 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2401
2402 if ($purge_unreferenced) { # also remove unreferenced disk
2403 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2404 PVE::Storage::foreach_volid($vmdisks, sub {
2405 my ($volid, $sid, $volname, $d) = @_;
2406 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2407 warn $@ if $@;
2408 });
2409 }
2410
2411 if (defined $replacement_conf) {
2412 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2413 } else {
2414 PVE::QemuConfig->destroy_config($vmid);
2415 }
2416 }
2417
2418 sub parse_vm_config {
2419 my ($filename, $raw, $strict) = @_;
2420
2421 return if !defined($raw);
2422
2423 my $res = {
2424 digest => Digest::SHA::sha1_hex($raw),
2425 snapshots => {},
2426 pending => {},
2427 cloudinit => {},
2428 };
2429
2430 my $handle_error = sub {
2431 my ($msg) = @_;
2432
2433 if ($strict) {
2434 die $msg;
2435 } else {
2436 warn $msg;
2437 }
2438 };
2439
2440 $filename =~ m|/qemu-server/(\d+)\.conf$|
2441 || die "got strange filename '$filename'";
2442
2443 my $vmid = $1;
2444
2445 my $conf = $res;
2446 my $descr;
2447 my $finish_description = sub {
2448 if (defined($descr)) {
2449 $descr =~ s/\s+$//;
2450 $conf->{description} = $descr;
2451 }
2452 $descr = undef;
2453 };
2454 my $section = '';
2455
2456 my @lines = split(/\n/, $raw);
2457 foreach my $line (@lines) {
2458 next if $line =~ m/^\s*$/;
2459
2460 if ($line =~ m/^\[PENDING\]\s*$/i) {
2461 $section = 'pending';
2462 $finish_description->();
2463 $conf = $res->{$section} = {};
2464 next;
2465 } elsif ($line =~ m/^\[special:cloudinit\]\s*$/i) {
2466 $section = 'cloudinit';
2467 $finish_description->();
2468 $conf = $res->{$section} = {};
2469 next;
2470
2471 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2472 $section = $1;
2473 $finish_description->();
2474 $conf = $res->{snapshots}->{$section} = {};
2475 next;
2476 }
2477
2478 if ($line =~ m/^\#(.*)$/) {
2479 $descr = '' if !defined($descr);
2480 $descr .= PVE::Tools::decode_text($1) . "\n";
2481 next;
2482 }
2483
2484 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2485 $descr = '' if !defined($descr);
2486 $descr .= PVE::Tools::decode_text($2);
2487 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2488 $conf->{snapstate} = $1;
2489 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2490 my $key = $1;
2491 my $value = $2;
2492 $conf->{$key} = $value;
2493 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2494 my $value = $1;
2495 if ($section eq 'pending') {
2496 $conf->{delete} = $value; # we parse this later
2497 } else {
2498 $handle_error->("vm $vmid - property 'delete' is only allowed in [PENDING]\n");
2499 }
2500 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2501 my $key = $1;
2502 my $value = $2;
2503 if ($section eq 'cloudinit') {
2504 # ignore validation only used for informative purpose
2505 $conf->{$key} = $value;
2506 next;
2507 }
2508 eval { $value = check_type($key, $value); };
2509 if ($@) {
2510 $handle_error->("vm $vmid - unable to parse value of '$key' - $@");
2511 } else {
2512 $key = 'ide2' if $key eq 'cdrom';
2513 my $fmt = $confdesc->{$key}->{format};
2514 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2515 my $v = parse_drive($key, $value);
2516 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2517 $v->{file} = $volid;
2518 $value = print_drive($v);
2519 } else {
2520 $handle_error->("vm $vmid - unable to parse value of '$key'\n");
2521 next;
2522 }
2523 }
2524
2525 $conf->{$key} = $value;
2526 }
2527 } else {
2528 $handle_error->("vm $vmid - unable to parse config: $line\n");
2529 }
2530 }
2531
2532 $finish_description->();
2533 delete $res->{snapstate}; # just to be sure
2534
2535 return $res;
2536 }
2537
2538 sub write_vm_config {
2539 my ($filename, $conf) = @_;
2540
2541 delete $conf->{snapstate}; # just to be sure
2542
2543 if ($conf->{cdrom}) {
2544 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2545 $conf->{ide2} = $conf->{cdrom};
2546 delete $conf->{cdrom};
2547 }
2548
2549 # we do not use 'smp' any longer
2550 if ($conf->{sockets}) {
2551 delete $conf->{smp};
2552 } elsif ($conf->{smp}) {
2553 $conf->{sockets} = $conf->{smp};
2554 delete $conf->{cores};
2555 delete $conf->{smp};
2556 }
2557
2558 my $used_volids = {};
2559
2560 my $cleanup_config = sub {
2561 my ($cref, $pending, $snapname) = @_;
2562
2563 foreach my $key (keys %$cref) {
2564 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2565 $key eq 'snapstate' || $key eq 'pending' || $key eq 'cloudinit';
2566 my $value = $cref->{$key};
2567 if ($key eq 'delete') {
2568 die "propertry 'delete' is only allowed in [PENDING]\n"
2569 if !$pending;
2570 # fixme: check syntax?
2571 next;
2572 }
2573 eval { $value = check_type($key, $value); };
2574 die "unable to parse value of '$key' - $@" if $@;
2575
2576 $cref->{$key} = $value;
2577
2578 if (!$snapname && is_valid_drivename($key)) {
2579 my $drive = parse_drive($key, $value);
2580 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2581 }
2582 }
2583 };
2584
2585 &$cleanup_config($conf);
2586
2587 &$cleanup_config($conf->{pending}, 1);
2588
2589 foreach my $snapname (keys %{$conf->{snapshots}}) {
2590 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2591 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2592 }
2593
2594 # remove 'unusedX' settings if we re-add a volume
2595 foreach my $key (keys %$conf) {
2596 my $value = $conf->{$key};
2597 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2598 delete $conf->{$key};
2599 }
2600 }
2601
2602 my $generate_raw_config = sub {
2603 my ($conf, $pending) = @_;
2604
2605 my $raw = '';
2606
2607 # add description as comment to top of file
2608 if (defined(my $descr = $conf->{description})) {
2609 if ($descr) {
2610 foreach my $cl (split(/\n/, $descr)) {
2611 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2612 }
2613 } else {
2614 $raw .= "#\n" if $pending;
2615 }
2616 }
2617
2618 foreach my $key (sort keys %$conf) {
2619 next if $key =~ /^(digest|description|pending|cloudinit|snapshots)$/;
2620 $raw .= "$key: $conf->{$key}\n";
2621 }
2622 return $raw;
2623 };
2624
2625 my $raw = &$generate_raw_config($conf);
2626
2627 if (scalar(keys %{$conf->{pending}})){
2628 $raw .= "\n[PENDING]\n";
2629 $raw .= &$generate_raw_config($conf->{pending}, 1);
2630 }
2631
2632 if (scalar(keys %{$conf->{cloudinit}}) && PVE::QemuConfig->has_cloudinit($conf)){
2633 $raw .= "\n[special:cloudinit]\n";
2634 $raw .= &$generate_raw_config($conf->{cloudinit});
2635 }
2636
2637 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2638 $raw .= "\n[$snapname]\n";
2639 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2640 }
2641
2642 return $raw;
2643 }
2644
2645 sub load_defaults {
2646
2647 my $res = {};
2648
2649 # we use static defaults from our JSON schema configuration
2650 foreach my $key (keys %$confdesc) {
2651 if (defined(my $default = $confdesc->{$key}->{default})) {
2652 $res->{$key} = $default;
2653 }
2654 }
2655
2656 return $res;
2657 }
2658
2659 sub config_list {
2660 my $vmlist = PVE::Cluster::get_vmlist();
2661 my $res = {};
2662 return $res if !$vmlist || !$vmlist->{ids};
2663 my $ids = $vmlist->{ids};
2664 my $nodename = nodename();
2665
2666 foreach my $vmid (keys %$ids) {
2667 my $d = $ids->{$vmid};
2668 next if !$d->{node} || $d->{node} ne $nodename;
2669 next if !$d->{type} || $d->{type} ne 'qemu';
2670 $res->{$vmid}->{exists} = 1;
2671 }
2672 return $res;
2673 }
2674
2675 # test if VM uses local resources (to prevent migration)
2676 sub check_local_resources {
2677 my ($conf, $noerr) = @_;
2678
2679 my @loc_res = ();
2680
2681 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2682 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2683
2684 push @loc_res, "ivshmem" if $conf->{ivshmem};
2685
2686 foreach my $k (keys %$conf) {
2687 next if $k =~ m/^usb/ && ($conf->{$k} =~ m/^spice(?![^,])/);
2688 # sockets are safe: they will recreated be on the target side post-migrate
2689 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2690 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2691 }
2692
2693 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2694
2695 return \@loc_res;
2696 }
2697
2698 # check if used storages are available on all nodes (use by migrate)
2699 sub check_storage_availability {
2700 my ($storecfg, $conf, $node) = @_;
2701
2702 PVE::QemuConfig->foreach_volume($conf, sub {
2703 my ($ds, $drive) = @_;
2704
2705 my $volid = $drive->{file};
2706 return if !$volid;
2707
2708 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2709 return if !$sid;
2710
2711 # check if storage is available on both nodes
2712 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2713 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2714
2715 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2716
2717 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2718 if !$scfg->{content}->{$vtype};
2719 });
2720 }
2721
2722 # list nodes where all VM images are available (used by has_feature API)
2723 sub shared_nodes {
2724 my ($conf, $storecfg) = @_;
2725
2726 my $nodelist = PVE::Cluster::get_nodelist();
2727 my $nodehash = { map { $_ => 1 } @$nodelist };
2728 my $nodename = nodename();
2729
2730 PVE::QemuConfig->foreach_volume($conf, sub {
2731 my ($ds, $drive) = @_;
2732
2733 my $volid = $drive->{file};
2734 return if !$volid;
2735
2736 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2737 if ($storeid) {
2738 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2739 if ($scfg->{disable}) {
2740 $nodehash = {};
2741 } elsif (my $avail = $scfg->{nodes}) {
2742 foreach my $node (keys %$nodehash) {
2743 delete $nodehash->{$node} if !$avail->{$node};
2744 }
2745 } elsif (!$scfg->{shared}) {
2746 foreach my $node (keys %$nodehash) {
2747 delete $nodehash->{$node} if $node ne $nodename
2748 }
2749 }
2750 }
2751 });
2752
2753 return $nodehash
2754 }
2755
2756 sub check_local_storage_availability {
2757 my ($conf, $storecfg) = @_;
2758
2759 my $nodelist = PVE::Cluster::get_nodelist();
2760 my $nodehash = { map { $_ => {} } @$nodelist };
2761
2762 PVE::QemuConfig->foreach_volume($conf, sub {
2763 my ($ds, $drive) = @_;
2764
2765 my $volid = $drive->{file};
2766 return if !$volid;
2767
2768 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2769 if ($storeid) {
2770 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2771
2772 if ($scfg->{disable}) {
2773 foreach my $node (keys %$nodehash) {
2774 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2775 }
2776 } elsif (my $avail = $scfg->{nodes}) {
2777 foreach my $node (keys %$nodehash) {
2778 if (!$avail->{$node}) {
2779 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2780 }
2781 }
2782 }
2783 }
2784 });
2785
2786 foreach my $node (values %$nodehash) {
2787 if (my $unavail = $node->{unavailable_storages}) {
2788 $node->{unavailable_storages} = [ sort keys %$unavail ];
2789 }
2790 }
2791
2792 return $nodehash
2793 }
2794
2795 # Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2796 sub check_running {
2797 my ($vmid, $nocheck, $node) = @_;
2798
2799 # $nocheck is set when called during a migration, in which case the config
2800 # file might still or already reside on the *other* node
2801 # - because rename has already happened, and current node is source
2802 # - because rename hasn't happened yet, and current node is target
2803 # - because rename has happened, current node is target, but hasn't yet
2804 # processed it yet
2805 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2806 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2807 }
2808
2809 sub vzlist {
2810
2811 my $vzlist = config_list();
2812
2813 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2814
2815 while (defined(my $de = $fd->read)) {
2816 next if $de !~ m/^(\d+)\.pid$/;
2817 my $vmid = $1;
2818 next if !defined($vzlist->{$vmid});
2819 if (my $pid = check_running($vmid)) {
2820 $vzlist->{$vmid}->{pid} = $pid;
2821 }
2822 }
2823
2824 return $vzlist;
2825 }
2826
2827 our $vmstatus_return_properties = {
2828 vmid => get_standard_option('pve-vmid'),
2829 status => {
2830 description => "QEMU process status.",
2831 type => 'string',
2832 enum => ['stopped', 'running'],
2833 },
2834 maxmem => {
2835 description => "Maximum memory in bytes.",
2836 type => 'integer',
2837 optional => 1,
2838 renderer => 'bytes',
2839 },
2840 maxdisk => {
2841 description => "Root disk size in bytes.",
2842 type => 'integer',
2843 optional => 1,
2844 renderer => 'bytes',
2845 },
2846 name => {
2847 description => "VM name.",
2848 type => 'string',
2849 optional => 1,
2850 },
2851 qmpstatus => {
2852 description => "QEMU QMP agent status.",
2853 type => 'string',
2854 optional => 1,
2855 },
2856 pid => {
2857 description => "PID of running qemu process.",
2858 type => 'integer',
2859 optional => 1,
2860 },
2861 uptime => {
2862 description => "Uptime.",
2863 type => 'integer',
2864 optional => 1,
2865 renderer => 'duration',
2866 },
2867 cpus => {
2868 description => "Maximum usable CPUs.",
2869 type => 'number',
2870 optional => 1,
2871 },
2872 lock => {
2873 description => "The current config lock, if any.",
2874 type => 'string',
2875 optional => 1,
2876 },
2877 tags => {
2878 description => "The current configured tags, if any",
2879 type => 'string',
2880 optional => 1,
2881 },
2882 'running-machine' => {
2883 description => "The currently running machine type (if running).",
2884 type => 'string',
2885 optional => 1,
2886 },
2887 'running-qemu' => {
2888 description => "The currently running QEMU version (if running).",
2889 type => 'string',
2890 optional => 1,
2891 },
2892 };
2893
2894 my $last_proc_pid_stat;
2895
2896 # get VM status information
2897 # This must be fast and should not block ($full == false)
2898 # We only query KVM using QMP if $full == true (this can be slow)
2899 sub vmstatus {
2900 my ($opt_vmid, $full) = @_;
2901
2902 my $res = {};
2903
2904 my $storecfg = PVE::Storage::config();
2905
2906 my $list = vzlist();
2907 my $defaults = load_defaults();
2908
2909 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2910
2911 my $cpucount = $cpuinfo->{cpus} || 1;
2912
2913 foreach my $vmid (keys %$list) {
2914 next if $opt_vmid && ($vmid ne $opt_vmid);
2915
2916 my $conf = PVE::QemuConfig->load_config($vmid);
2917
2918 my $d = { vmid => int($vmid) };
2919 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2920
2921 # fixme: better status?
2922 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2923
2924 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2925 if (defined($size)) {
2926 $d->{disk} = 0; # no info available
2927 $d->{maxdisk} = $size;
2928 } else {
2929 $d->{disk} = 0;
2930 $d->{maxdisk} = 0;
2931 }
2932
2933 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2934 * ($conf->{cores} || $defaults->{cores});
2935 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2936 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2937
2938 $d->{name} = $conf->{name} || "VM $vmid";
2939 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2940 : $defaults->{memory}*(1024*1024);
2941
2942 if ($conf->{balloon}) {
2943 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2944 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2945 : $defaults->{shares};
2946 }
2947
2948 $d->{uptime} = 0;
2949 $d->{cpu} = 0;
2950 $d->{mem} = 0;
2951
2952 $d->{netout} = 0;
2953 $d->{netin} = 0;
2954
2955 $d->{diskread} = 0;
2956 $d->{diskwrite} = 0;
2957
2958 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2959
2960 $d->{serial} = 1 if conf_has_serial($conf);
2961 $d->{lock} = $conf->{lock} if $conf->{lock};
2962 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2963
2964 $res->{$vmid} = $d;
2965 }
2966
2967 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2968 foreach my $dev (keys %$netdev) {
2969 next if $dev !~ m/^tap([1-9]\d*)i/;
2970 my $vmid = $1;
2971 my $d = $res->{$vmid};
2972 next if !$d;
2973
2974 $d->{netout} += $netdev->{$dev}->{receive};
2975 $d->{netin} += $netdev->{$dev}->{transmit};
2976
2977 if ($full) {
2978 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2979 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2980 }
2981
2982 }
2983
2984 my $ctime = gettimeofday;
2985
2986 foreach my $vmid (keys %$list) {
2987
2988 my $d = $res->{$vmid};
2989 my $pid = $d->{pid};
2990 next if !$pid;
2991
2992 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2993 next if !$pstat; # not running
2994
2995 my $used = $pstat->{utime} + $pstat->{stime};
2996
2997 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2998
2999 if ($pstat->{vsize}) {
3000 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
3001 }
3002
3003 my $old = $last_proc_pid_stat->{$pid};
3004 if (!$old) {
3005 $last_proc_pid_stat->{$pid} = {
3006 time => $ctime,
3007 used => $used,
3008 cpu => 0,
3009 };
3010 next;
3011 }
3012
3013 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
3014
3015 if ($dtime > 1000) {
3016 my $dutime = $used - $old->{used};
3017
3018 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
3019 $last_proc_pid_stat->{$pid} = {
3020 time => $ctime,
3021 used => $used,
3022 cpu => $d->{cpu},
3023 };
3024 } else {
3025 $d->{cpu} = $old->{cpu};
3026 }
3027 }
3028
3029 return $res if !$full;
3030
3031 my $qmpclient = PVE::QMPClient->new();
3032
3033 my $ballooncb = sub {
3034 my ($vmid, $resp) = @_;
3035
3036 my $info = $resp->{'return'};
3037 return if !$info->{max_mem};
3038
3039 my $d = $res->{$vmid};
3040
3041 # use memory assigned to VM
3042 $d->{maxmem} = $info->{max_mem};
3043 $d->{balloon} = $info->{actual};
3044
3045 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
3046 $d->{mem} = $info->{total_mem} - $info->{free_mem};
3047 $d->{freemem} = $info->{free_mem};
3048 }
3049
3050 $d->{ballooninfo} = $info;
3051 };
3052
3053 my $blockstatscb = sub {
3054 my ($vmid, $resp) = @_;
3055 my $data = $resp->{'return'} || [];
3056 my $totalrdbytes = 0;
3057 my $totalwrbytes = 0;
3058
3059 for my $blockstat (@$data) {
3060 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
3061 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
3062
3063 $blockstat->{device} =~ s/drive-//;
3064 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
3065 }
3066 $res->{$vmid}->{diskread} = $totalrdbytes;
3067 $res->{$vmid}->{diskwrite} = $totalwrbytes;
3068 };
3069
3070 my $machinecb = sub {
3071 my ($vmid, $resp) = @_;
3072 my $data = $resp->{'return'} || [];
3073
3074 $res->{$vmid}->{'running-machine'} =
3075 PVE::QemuServer::Machine::current_from_query_machines($data);
3076 };
3077
3078 my $versioncb = sub {
3079 my ($vmid, $resp) = @_;
3080 my $data = $resp->{'return'} // {};
3081 my $version = 'unknown';
3082
3083 if (my $v = $data->{qemu}) {
3084 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
3085 }
3086
3087 $res->{$vmid}->{'running-qemu'} = $version;
3088 };
3089
3090 my $statuscb = sub {
3091 my ($vmid, $resp) = @_;
3092
3093 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
3094 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
3095 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
3096 # this fails if ballon driver is not loaded, so this must be
3097 # the last commnand (following command are aborted if this fails).
3098 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
3099
3100 my $status = 'unknown';
3101 if (!defined($status = $resp->{'return'}->{status})) {
3102 warn "unable to get VM status\n";
3103 return;
3104 }
3105
3106 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
3107 };
3108
3109 foreach my $vmid (keys %$list) {
3110 next if $opt_vmid && ($vmid ne $opt_vmid);
3111 next if !$res->{$vmid}->{pid}; # not running
3112 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
3113 }
3114
3115 $qmpclient->queue_execute(undef, 2);
3116
3117 foreach my $vmid (keys %$list) {
3118 next if $opt_vmid && ($vmid ne $opt_vmid);
3119 next if !$res->{$vmid}->{pid}; #not running
3120
3121 # we can't use the $qmpclient since it might have already aborted on
3122 # 'query-balloon', but this might also fail for older versions...
3123 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
3124 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
3125 }
3126
3127 foreach my $vmid (keys %$list) {
3128 next if $opt_vmid && ($vmid ne $opt_vmid);
3129 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
3130 }
3131
3132 return $res;
3133 }
3134
3135 sub conf_has_serial {
3136 my ($conf) = @_;
3137
3138 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3139 if ($conf->{"serial$i"}) {
3140 return 1;
3141 }
3142 }
3143
3144 return 0;
3145 }
3146
3147 sub conf_has_audio {
3148 my ($conf, $id) = @_;
3149
3150 $id //= 0;
3151 my $audio = $conf->{"audio$id"};
3152 return if !defined($audio);
3153
3154 my $audioproperties = parse_property_string($audio_fmt, $audio);
3155 my $audiodriver = $audioproperties->{driver} // 'spice';
3156
3157 return {
3158 dev => $audioproperties->{device},
3159 dev_id => "audiodev$id",
3160 backend => $audiodriver,
3161 backend_id => "$audiodriver-backend${id}",
3162 };
3163 }
3164
3165 sub audio_devs {
3166 my ($audio, $audiopciaddr, $machine_version) = @_;
3167
3168 my $devs = [];
3169
3170 my $id = $audio->{dev_id};
3171 my $audiodev = "";
3172 if (min_version($machine_version, 4, 2)) {
3173 $audiodev = ",audiodev=$audio->{backend_id}";
3174 }
3175
3176 if ($audio->{dev} eq 'AC97') {
3177 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
3178 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3179 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
3180 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3181 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
3182 } else {
3183 die "unkown audio device '$audio->{dev}', implement me!";
3184 }
3185
3186 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3187
3188 return $devs;
3189 }
3190
3191 sub get_tpm_paths {
3192 my ($vmid) = @_;
3193 return {
3194 socket => "/var/run/qemu-server/$vmid.swtpm",
3195 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3196 };
3197 }
3198
3199 sub add_tpm_device {
3200 my ($vmid, $devices, $conf) = @_;
3201
3202 return if !$conf->{tpmstate0};
3203
3204 my $paths = get_tpm_paths($vmid);
3205
3206 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3207 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3208 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3209 }
3210
3211 sub start_swtpm {
3212 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3213
3214 return if !$tpmdrive;
3215
3216 my $state;
3217 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3218 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3219 if ($storeid) {
3220 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3221 } else {
3222 $state = $tpm->{file};
3223 }
3224
3225 my $paths = get_tpm_paths($vmid);
3226
3227 # during migration, we will get state from remote
3228 #
3229 if (!$migration) {
3230 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3231 my $setup_cmd = [
3232 "swtpm_setup",
3233 "--tpmstate",
3234 "file://$state",
3235 "--createek",
3236 "--create-ek-cert",
3237 "--create-platform-cert",
3238 "--lock-nvram",
3239 "--config",
3240 "/etc/swtpm_setup.conf", # do not use XDG configs
3241 "--runas",
3242 "0", # force creation as root, error if not possible
3243 "--not-overwrite", # ignore existing state, do not modify
3244 ];
3245
3246 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3247 # TPM 2.0 supports ECC crypto, use if possible
3248 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3249
3250 run_command($setup_cmd, outfunc => sub {
3251 print "swtpm_setup: $1\n";
3252 });
3253 }
3254
3255 my $emulator_cmd = [
3256 "swtpm",
3257 "socket",
3258 "--tpmstate",
3259 "backend-uri=file://$state,mode=0600",
3260 "--ctrl",
3261 "type=unixio,path=$paths->{socket},mode=0600",
3262 "--pid",
3263 "file=$paths->{pid}",
3264 "--terminate", # terminate on QEMU disconnect
3265 "--daemon",
3266 ];
3267 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3268 run_command($emulator_cmd, outfunc => sub { print $1; });
3269
3270 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3271 while (! -e $paths->{pid}) {
3272 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3273 usleep(50_000);
3274 }
3275
3276 # return untainted PID of swtpm daemon so it can be killed on error
3277 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3278 return $1;
3279 }
3280
3281 sub vga_conf_has_spice {
3282 my ($vga) = @_;
3283
3284 my $vgaconf = parse_vga($vga);
3285 my $vgatype = $vgaconf->{type};
3286 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3287
3288 return $1 || 1;
3289 }
3290
3291 sub is_native($) {
3292 my ($arch) = @_;
3293 return get_host_arch() eq $arch;
3294 }
3295
3296 sub get_vm_arch {
3297 my ($conf) = @_;
3298 return $conf->{arch} // get_host_arch();
3299 }
3300
3301 my $default_machines = {
3302 x86_64 => 'pc',
3303 aarch64 => 'virt',
3304 };
3305
3306 sub get_installed_machine_version {
3307 my ($kvmversion) = @_;
3308 $kvmversion = kvm_user_version() if !defined($kvmversion);
3309 $kvmversion =~ m/^(\d+\.\d+)/;
3310 return $1;
3311 }
3312
3313 sub windows_get_pinned_machine_version {
3314 my ($machine, $base_version, $kvmversion) = @_;
3315
3316 my $pin_version = $base_version;
3317 if (!defined($base_version) ||
3318 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3319 ) {
3320 $pin_version = get_installed_machine_version($kvmversion);
3321 }
3322 if (!$machine || $machine eq 'pc') {
3323 $machine = "pc-i440fx-$pin_version";
3324 } elsif ($machine eq 'q35') {
3325 $machine = "pc-q35-$pin_version";
3326 } elsif ($machine eq 'virt') {
3327 $machine = "virt-$pin_version";
3328 } else {
3329 warn "unknown machine type '$machine', not touching that!\n";
3330 }
3331
3332 return $machine;
3333 }
3334
3335 sub get_vm_machine {
3336 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3337
3338 my $machine = $forcemachine || $conf->{machine};
3339
3340 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3341 $kvmversion //= kvm_user_version();
3342 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3343 # layout which confuses windows quite a bit and may result in various regressions..
3344 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3345 if (windows_version($conf->{ostype})) {
3346 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3347 }
3348 $arch //= 'x86_64';
3349 $machine ||= $default_machines->{$arch};
3350 if ($add_pve_version) {
3351 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3352 $machine .= "+pve$pvever";
3353 }
3354 }
3355
3356 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3357 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3358 $machine = $1 if $is_pxe;
3359
3360 # for version-pinned machines that do not include a pve-version (e.g.
3361 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3362 $machine .= '+pve0';
3363
3364 $machine .= '.pxe' if $is_pxe;
3365 }
3366
3367 return $machine;
3368 }
3369
3370 sub get_ovmf_files($$$) {
3371 my ($arch, $efidisk, $smm) = @_;
3372
3373 my $types = $OVMF->{$arch}
3374 or die "no OVMF images known for architecture '$arch'\n";
3375
3376 my $type = 'default';
3377 if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3378 $type = $smm ? "4m" : "4m-no-smm";
3379 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3380 }
3381
3382 return $types->{$type}->@*;
3383 }
3384
3385 my $Arch2Qemu = {
3386 aarch64 => '/usr/bin/qemu-system-aarch64',
3387 x86_64 => '/usr/bin/qemu-system-x86_64',
3388 };
3389 sub get_command_for_arch($) {
3390 my ($arch) = @_;
3391 return '/usr/bin/kvm' if is_native($arch);
3392
3393 my $cmd = $Arch2Qemu->{$arch}
3394 or die "don't know how to emulate architecture '$arch'\n";
3395 return $cmd;
3396 }
3397
3398 # To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3399 # to use in a QEMU command line (-cpu element), first array_intersect the result
3400 # of query_supported_ with query_understood_. This is necessary because:
3401 #
3402 # a) query_understood_ returns flags the host cannot use and
3403 # b) query_supported_ (rather the QMP call) doesn't actually return CPU
3404 # flags, but CPU settings - with most of them being flags. Those settings
3405 # (and some flags, curiously) cannot be specified as a "-cpu" argument.
3406 #
3407 # query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3408 # expensive. If you need the value returned from this, you can get it much
3409 # cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3410 # $accel being 'kvm' or 'tcg'.
3411 #
3412 # pvestatd calls this function on startup and whenever the QEMU/KVM version
3413 # changes, automatically populating pmxcfs.
3414 #
3415 # Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3416 # since kvm and tcg machines support different flags
3417 #
3418 sub query_supported_cpu_flags {
3419 my ($arch) = @_;
3420
3421 $arch //= get_host_arch();
3422 my $default_machine = $default_machines->{$arch};
3423
3424 my $flags = {};
3425
3426 # FIXME: Once this is merged, the code below should work for ARM as well:
3427 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3428 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3429 $arch eq "aarch64";
3430
3431 my $kvm_supported = defined(kvm_version());
3432 my $qemu_cmd = get_command_for_arch($arch);
3433 my $fakevmid = -1;
3434 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3435
3436 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3437 my $query_supported_run_qemu = sub {
3438 my ($kvm) = @_;
3439
3440 my $flags = {};
3441 my $cmd = [
3442 $qemu_cmd,
3443 '-machine', $default_machine,
3444 '-display', 'none',
3445 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3446 '-mon', 'chardev=qmp,mode=control',
3447 '-pidfile', $pidfile,
3448 '-S', '-daemonize'
3449 ];
3450
3451 if (!$kvm) {
3452 push @$cmd, '-accel', 'tcg';
3453 }
3454
3455 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3456 die "QEMU flag querying VM exited with code " . $rc if $rc;
3457
3458 eval {
3459 my $cmd_result = mon_cmd(
3460 $fakevmid,
3461 'query-cpu-model-expansion',
3462 type => 'full',
3463 model => { name => 'host' }
3464 );
3465
3466 my $props = $cmd_result->{model}->{props};
3467 foreach my $prop (keys %$props) {
3468 next if $props->{$prop} ne '1';
3469 # QEMU returns some flags multiple times, with '_', '.' or '-'
3470 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3471 # We only keep those with underscores, to match /proc/cpuinfo
3472 $prop =~ s/\.|-/_/g;
3473 $flags->{$prop} = 1;
3474 }
3475 };
3476 my $err = $@;
3477
3478 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3479 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3480
3481 die $err if $err;
3482
3483 return [ sort keys %$flags ];
3484 };
3485
3486 # We need to query QEMU twice, since KVM and TCG have different supported flags
3487 PVE::QemuConfig->lock_config($fakevmid, sub {
3488 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3489 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3490
3491 if ($kvm_supported) {
3492 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3493 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3494 }
3495 });
3496
3497 return $flags;
3498 }
3499
3500 # Understood CPU flags are written to a file at 'pve-qemu' compile time
3501 my $understood_cpu_flag_dir = "/usr/share/kvm";
3502 sub query_understood_cpu_flags {
3503 my $arch = get_host_arch();
3504 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3505
3506 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3507 if ! -e $filepath;
3508
3509 my $raw = file_get_contents($filepath);
3510 $raw =~ s/^\s+|\s+$//g;
3511 my @flags = split(/\s+/, $raw);
3512
3513 return \@flags;
3514 }
3515
3516 # Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
3517 # anymore. But smm=off seems to be required when using SeaBIOS and serial display.
3518 my sub should_disable_smm {
3519 my ($conf, $vga) = @_;
3520
3521 return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
3522 $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
3523 }
3524
3525 my sub print_ovmf_drive_commandlines {
3526 my ($conf, $storecfg, $vmid, $arch, $q35, $version_guard) = @_;
3527
3528 my $d = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
3529
3530 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3531 die "uefi base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3532
3533 my $var_drive_str = "if=pflash,unit=1,id=drive-efidisk0";
3534 if ($d) {
3535 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3536 my ($path, $format) = $d->@{'file', 'format'};
3537 if ($storeid) {
3538 $path = PVE::Storage::path($storecfg, $d->{file});
3539 if (!defined($format)) {
3540 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3541 $format = qemu_img_format($scfg, $volname);
3542 }
3543 } elsif (!defined($format)) {
3544 die "efidisk format must be specified\n";
3545 }
3546 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3547 if ($path =~ m/^rbd:/) {
3548 $var_drive_str .= ',cache=writeback';
3549 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3550 }
3551 $var_drive_str .= ",format=$format,file=$path";
3552
3553 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $format eq 'raw' && $version_guard->(4, 1, 2);
3554 $var_drive_str .= ',readonly=on' if drive_is_read_only($conf, $d);
3555 } else {
3556 log_warn("no efidisk configured! Using temporary efivars disk.");
3557 my $path = "/tmp/$vmid-ovmf.fd";
3558 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3559 $var_drive_str .= ",format=raw,file=$path";
3560 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $version_guard->(4, 1, 2);
3561 }
3562
3563 return ("if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code", $var_drive_str);
3564 }
3565
3566 sub config_to_command {
3567 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3568 $pbs_backing) = @_;
3569
3570 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3571 my $devices = [];
3572 my $bridges = {};
3573 my $ostype = $conf->{ostype};
3574 my $winversion = windows_version($ostype);
3575 my $kvm = $conf->{kvm};
3576 my $nodename = nodename();
3577
3578 my $arch = get_vm_arch($conf);
3579 my $kvm_binary = get_command_for_arch($arch);
3580 my $kvmver = kvm_user_version($kvm_binary);
3581
3582 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3583 $kvmver //= "undefined";
3584 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3585 }
3586
3587 my $add_pve_version = min_version($kvmver, 4, 1);
3588
3589 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3590 my $machine_version = extract_version($machine_type, $kvmver);
3591 $kvm //= 1 if is_native($arch);
3592
3593 $machine_version =~ m/(\d+)\.(\d+)/;
3594 my ($machine_major, $machine_minor) = ($1, $2);
3595
3596 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3597 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3598 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3599 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3600 ." please upgrade node '$nodename'\n"
3601 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3602 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3603 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3604 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3605 ." node '$nodename'\n";
3606 }
3607
3608 # if a specific +pve version is required for a feature, use $version_guard
3609 # instead of min_version to allow machines to be run with the minimum
3610 # required version
3611 my $required_pve_version = 0;
3612 my $version_guard = sub {
3613 my ($major, $minor, $pve) = @_;
3614 return 0 if !min_version($machine_version, $major, $minor, $pve);
3615 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3616 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3617 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3618 return 1;
3619 };
3620
3621 if ($kvm && !defined kvm_version()) {
3622 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3623 ." or enable in BIOS.\n";
3624 }
3625
3626 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3627 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3628 my $use_old_bios_files = undef;
3629 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3630
3631 my $cmd = [];
3632 if ($conf->{affinity}) {
3633 push @$cmd, '/usr/bin/taskset', '--cpu-list', '--all-tasks', $conf->{affinity};
3634 }
3635
3636 push @$cmd, $kvm_binary;
3637
3638 push @$cmd, '-id', $vmid;
3639
3640 my $vmname = $conf->{name} || "vm$vmid";
3641
3642 push @$cmd, '-name', "$vmname,debug-threads=on";
3643
3644 push @$cmd, '-no-shutdown';
3645
3646 my $use_virtio = 0;
3647
3648 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3649 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3650 push @$cmd, '-mon', "chardev=qmp,mode=control";
3651
3652 if (min_version($machine_version, 2, 12)) {
3653 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3654 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3655 }
3656
3657 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3658
3659 push @$cmd, '-daemonize';
3660
3661 if ($conf->{smbios1}) {
3662 my $smbios_conf = parse_smbios1($conf->{smbios1});
3663 if ($smbios_conf->{base64}) {
3664 # Do not pass base64 flag to qemu
3665 delete $smbios_conf->{base64};
3666 my $smbios_string = "";
3667 foreach my $key (keys %$smbios_conf) {
3668 my $value;
3669 if ($key eq "uuid") {
3670 $value = $smbios_conf->{uuid}
3671 } else {
3672 $value = decode_base64($smbios_conf->{$key});
3673 }
3674 # qemu accepts any binary data, only commas need escaping by double comma
3675 $value =~ s/,/,,/g;
3676 $smbios_string .= "," . $key . "=" . $value if $value;
3677 }
3678 push @$cmd, '-smbios', "type=1" . $smbios_string;
3679 } else {
3680 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3681 }
3682 }
3683
3684 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3685 my ($code_drive_str, $var_drive_str) =
3686 print_ovmf_drive_commandlines($conf, $storecfg, $vmid, $arch, $q35, $version_guard);
3687 push $cmd->@*, '-drive', $code_drive_str;
3688 push $cmd->@*, '-drive', $var_drive_str;
3689 }
3690
3691 if ($q35) { # tell QEMU to load q35 config early
3692 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3693 if (min_version($machine_version, 4, 0)) {
3694 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3695 } else {
3696 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3697 }
3698 }
3699
3700 if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
3701 push @$cmd, $fixups->@*;
3702 }
3703
3704 if ($conf->{vmgenid}) {
3705 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3706 }
3707
3708 # add usb controllers
3709 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3710 $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES, $machine_version);
3711 push @$devices, @usbcontrollers if @usbcontrollers;
3712 my $vga = parse_vga($conf->{vga});
3713
3714 my $qxlnum = vga_conf_has_spice($conf->{vga});
3715 $vga->{type} = 'qxl' if $qxlnum;
3716
3717 if (!$vga->{type}) {
3718 if ($arch eq 'aarch64') {
3719 $vga->{type} = 'virtio';
3720 } elsif (min_version($machine_version, 2, 9)) {
3721 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3722 } else {
3723 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3724 }
3725 }
3726
3727 # enable absolute mouse coordinates (needed by vnc)
3728 my $tablet = $conf->{tablet};
3729 if (!defined($tablet)) {
3730 $tablet = $defaults->{tablet};
3731 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3732 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3733 }
3734
3735 if ($tablet) {
3736 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3737 my $kbd = print_keyboarddevice_full($conf, $arch);
3738 push @$devices, '-device', $kbd if defined($kbd);
3739 }
3740
3741 my $bootorder = device_bootorder($conf);
3742
3743 # host pci device passthrough
3744 my ($kvm_off, $gpu_passthrough, $legacy_igd) = PVE::QemuServer::PCI::print_hostpci_devices(
3745 $vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder);
3746
3747 # usb devices
3748 my $usb_dev_features = {};
3749 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3750
3751 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3752 $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder, $machine_version);
3753 push @$devices, @usbdevices if @usbdevices;
3754
3755 # serial devices
3756 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3757 my $path = $conf->{"serial$i"} or next;
3758 if ($path eq 'socket') {
3759 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3760 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3761 # On aarch64, serial0 is the UART device. QEMU only allows
3762 # connecting UART devices via the '-serial' command line, as
3763 # the device has a fixed slot on the hardware...
3764 if ($arch eq 'aarch64' && $i == 0) {
3765 push @$devices, '-serial', "chardev:serial$i";
3766 } else {
3767 push @$devices, '-device', "isa-serial,chardev=serial$i";
3768 }
3769 } else {
3770 die "no such serial device\n" if ! -c $path;
3771 push @$devices, '-chardev', "tty,id=serial$i,path=$path";
3772 push @$devices, '-device', "isa-serial,chardev=serial$i";
3773 }
3774 }
3775
3776 # parallel devices
3777 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3778 if (my $path = $conf->{"parallel$i"}) {
3779 die "no such parallel device\n" if ! -c $path;
3780 my $devtype = $path =~ m!^/dev/usb/lp! ? 'tty' : 'parport';
3781 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3782 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3783 }
3784 }
3785
3786 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3787 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3788 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3789 push @$devices, @$audio_devs;
3790 }
3791
3792 add_tpm_device($vmid, $devices, $conf);
3793
3794 my $sockets = 1;
3795 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3796 $sockets = $conf->{sockets} if $conf->{sockets};
3797
3798 my $cores = $conf->{cores} || 1;
3799
3800 my $maxcpus = $sockets * $cores;
3801
3802 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3803
3804 my $allowed_vcpus = $cpuinfo->{cpus};
3805
3806 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3807
3808 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3809 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3810 for (my $i = 2; $i <= $vcpus; $i++) {
3811 my $cpustr = print_cpu_device($conf,$i);
3812 push @$cmd, '-device', $cpustr;
3813 }
3814
3815 } else {
3816
3817 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3818 }
3819 push @$cmd, '-nodefaults';
3820
3821 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3822
3823 push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3824
3825 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3826
3827 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3828 push @$devices, '-device', print_vga_device(
3829 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3830
3831 push @$cmd, '-display', 'egl-headless,gl=core' if $vga->{type} eq 'virtio-gl'; # VIRGL
3832
3833 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3834 push @$cmd, '-vnc', "unix:$socket,password=on";
3835 } else {
3836 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3837 push @$cmd, '-nographic';
3838 }
3839
3840 # time drift fix
3841 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3842 my $useLocaltime = $conf->{localtime};
3843
3844 if ($winversion >= 5) { # windows
3845 $useLocaltime = 1 if !defined($conf->{localtime});
3846
3847 # use time drift fix when acpi is enabled
3848 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3849 $tdf = 1 if !defined($conf->{tdf});
3850 }
3851 }
3852
3853 if ($winversion >= 6) {
3854 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3855 push @$cmd, '-no-hpet';
3856 }
3857
3858 push @$rtcFlags, 'driftfix=slew' if $tdf;
3859
3860 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3861 push @$rtcFlags, "base=$conf->{startdate}";
3862 } elsif ($useLocaltime) {
3863 push @$rtcFlags, 'base=localtime';
3864 }
3865
3866 if ($forcecpu) {
3867 push @$cmd, '-cpu', $forcecpu;
3868 } else {
3869 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3870 }
3871
3872 PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
3873
3874 push @$cmd, '-S' if $conf->{freeze};
3875
3876 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3877
3878 my $guest_agent = parse_guest_agent($conf);
3879
3880 if ($guest_agent->{enabled}) {
3881 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3882 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3883
3884 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3885 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3886 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3887 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3888 } elsif ($guest_agent->{type} eq 'isa') {
3889 push @$devices, '-device', "isa-serial,chardev=qga0";
3890 }
3891 }
3892
3893 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3894 if ($rng && $version_guard->(4, 1, 2)) {
3895 check_rng_source($rng->{source});
3896
3897 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3898 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3899 my $limiter_str = "";
3900 if ($max_bytes) {
3901 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3902 }
3903
3904 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3905 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3906 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3907 }
3908
3909 my $spice_port;
3910
3911 if ($qxlnum || $vga->{type} =~ /^virtio/) {
3912 if ($qxlnum > 1) {
3913 if ($winversion){
3914 for (my $i = 1; $i < $qxlnum; $i++){
3915 push @$devices, '-device', print_vga_device(
3916 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3917 }
3918 } else {
3919 # assume other OS works like Linux
3920 my ($ram, $vram) = ("134217728", "67108864");
3921 if ($vga->{memory}) {
3922 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3923 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3924 }
3925 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3926 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3927 }
3928 }
3929
3930 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3931
3932 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3933 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3934 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3935
3936 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3937 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3938 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3939
3940 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3941 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3942
3943 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3944 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3945 if ($spice_enhancement->{foldersharing}) {
3946 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3947 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3948 }
3949
3950 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3951 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3952 if $spice_enhancement->{videostreaming};
3953
3954 push @$devices, '-spice', "$spice_opts";
3955 }
3956
3957 # enable balloon by default, unless explicitly disabled
3958 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3959 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3960 my $ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr";
3961 $ballooncmd .= ",free-page-reporting=on" if min_version($machine_version, 6, 2);
3962 push @$devices, '-device', $ballooncmd;
3963 }
3964
3965 if ($conf->{watchdog}) {
3966 my $wdopts = parse_watchdog($conf->{watchdog});
3967 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3968 my $watchdog = $wdopts->{model} || 'i6300esb';
3969 push @$devices, '-device', "$watchdog$pciaddr";
3970 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3971 }
3972
3973 my $vollist = [];
3974 my $scsicontroller = {};
3975 my $ahcicontroller = {};
3976 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3977
3978 # Add iscsi initiator name if available
3979 if (my $initiator = get_initiator_name()) {
3980 push @$devices, '-iscsi', "initiator-name=$initiator";
3981 }
3982
3983 PVE::QemuConfig->foreach_volume($conf, sub {
3984 my ($ds, $drive) = @_;
3985
3986 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3987 check_volume_storage_type($storecfg, $drive->{file});
3988 push @$vollist, $drive->{file};
3989 }
3990
3991 # ignore efidisk here, already added in bios/fw handling code above
3992 return if $drive->{interface} eq 'efidisk';
3993 # similar for TPM
3994 return if $drive->{interface} eq 'tpmstate';
3995
3996 $use_virtio = 1 if $ds =~ m/^virtio/;
3997
3998 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3999
4000 if ($drive->{interface} eq 'virtio'){
4001 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
4002 }
4003
4004 if ($drive->{interface} eq 'scsi') {
4005
4006 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
4007
4008 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
4009 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
4010
4011 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
4012 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
4013
4014 my $iothread = '';
4015 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
4016 $iothread .= ",iothread=iothread-$controller_prefix$controller";
4017 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
4018 } elsif ($drive->{iothread}) {
4019 log_warn(
4020 "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n"
4021 );
4022 }
4023
4024 my $queues = '';
4025 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
4026 $queues = ",num_queues=$drive->{queues}";
4027 }
4028
4029 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
4030 if !$scsicontroller->{$controller};
4031 $scsicontroller->{$controller}=1;
4032 }
4033
4034 if ($drive->{interface} eq 'sata') {
4035 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
4036 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
4037 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
4038 if !$ahcicontroller->{$controller};
4039 $ahcicontroller->{$controller}=1;
4040 }
4041
4042 my $pbs_conf = $pbs_backing->{$ds};
4043 my $pbs_name = undef;
4044 if ($pbs_conf) {
4045 $pbs_name = "drive-$ds-pbs";
4046 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
4047 }
4048
4049 my $drive_cmd = print_drive_commandline_full(
4050 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
4051
4052 # extra protection for templates, but SATA and IDE don't support it..
4053 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
4054
4055 push @$devices, '-drive',$drive_cmd;
4056 push @$devices, '-device', print_drivedevice_full(
4057 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
4058 });
4059
4060 for (my $i = 0; $i < $MAX_NETS; $i++) {
4061 my $netname = "net$i";
4062
4063 next if !$conf->{$netname};
4064 my $d = parse_net($conf->{$netname});
4065 next if !$d;
4066 # save the MAC addr here (could be auto-gen. in some odd setups) for FDB registering later?
4067
4068 $use_virtio = 1 if $d->{model} eq 'virtio';
4069
4070 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
4071
4072 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
4073 push @$devices, '-netdev', $netdevfull;
4074
4075 my $netdevicefull = print_netdevice_full(
4076 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version);
4077
4078 push @$devices, '-device', $netdevicefull;
4079 }
4080
4081 if ($conf->{ivshmem}) {
4082 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
4083
4084 my $bus;
4085 if ($q35) {
4086 $bus = print_pcie_addr("ivshmem");
4087 } else {
4088 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
4089 }
4090
4091 my $ivshmem_name = $ivshmem->{name} // $vmid;
4092 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
4093
4094 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
4095 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
4096 .",size=$ivshmem->{size}M";
4097 }
4098
4099 # pci.4 is nested in pci.1
4100 $bridges->{1} = 1 if $bridges->{4};
4101
4102 if (!$q35) { # add pci bridges
4103 if (min_version($machine_version, 2, 3)) {
4104 $bridges->{1} = 1;
4105 $bridges->{2} = 1;
4106 }
4107 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
4108 }
4109
4110 for my $k (sort {$b cmp $a} keys %$bridges) {
4111 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
4112
4113 my $k_name = $k;
4114 if ($k == 2 && $legacy_igd) {
4115 $k_name = "$k-igd";
4116 }
4117 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
4118 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
4119
4120 if ($q35) { # add after -readconfig pve-q35.cfg
4121 splice @$devices, 2, 0, '-device', $devstr;
4122 } else {
4123 unshift @$devices, '-device', $devstr if $k > 0;
4124 }
4125 }
4126
4127 if (!$kvm) {
4128 push @$machineFlags, 'accel=tcg';
4129 }
4130
4131 push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga);
4132
4133 my $machine_type_min = $machine_type;
4134 if ($add_pve_version) {
4135 $machine_type_min =~ s/\+pve\d+$//;
4136 $machine_type_min .= "+pve$required_pve_version";
4137 }
4138 push @$machineFlags, "type=${machine_type_min}";
4139
4140 push @$cmd, @$devices;
4141 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
4142 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
4143 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
4144
4145 if (my $vmstate = $conf->{vmstate}) {
4146 my $statepath = PVE::Storage::path($storecfg, $vmstate);
4147 push @$vollist, $vmstate;
4148 push @$cmd, '-loadstate', $statepath;
4149 print "activating and using '$vmstate' as vmstate\n";
4150 }
4151
4152 if (PVE::QemuConfig->is_template($conf)) {
4153 # needed to workaround base volumes being read-only
4154 push @$cmd, '-snapshot';
4155 }
4156
4157 # add custom args
4158 if ($conf->{args}) {
4159 my $aa = PVE::Tools::split_args($conf->{args});
4160 push @$cmd, @$aa;
4161 }
4162
4163 return wantarray ? ($cmd, $vollist, $spice_port) : $cmd;
4164 }
4165
4166 sub check_rng_source {
4167 my ($source) = @_;
4168
4169 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4170 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4171 if ! -e $source;
4172
4173 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4174 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4175 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4176 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4177 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4178 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4179 ." to the host.\n";
4180 }
4181 }
4182
4183 sub spice_port {
4184 my ($vmid) = @_;
4185
4186 my $res = mon_cmd($vmid, 'query-spice');
4187
4188 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
4189 }
4190
4191 sub vm_devices_list {
4192 my ($vmid) = @_;
4193
4194 my $res = mon_cmd($vmid, 'query-pci');
4195 my $devices_to_check = [];
4196 my $devices = {};
4197 foreach my $pcibus (@$res) {
4198 push @$devices_to_check, @{$pcibus->{devices}},
4199 }
4200
4201 while (@$devices_to_check) {
4202 my $to_check = [];
4203 for my $d (@$devices_to_check) {
4204 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
4205 next if !$d->{'pci_bridge'} || !$d->{'pci_bridge'}->{devices};
4206
4207 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4208 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
4209 }
4210 $devices_to_check = $to_check;
4211 }
4212
4213 my $resblock = mon_cmd($vmid, 'query-block');
4214 foreach my $block (@$resblock) {
4215 if($block->{device} =~ m/^drive-(\S+)/){
4216 $devices->{$1} = 1;
4217 }
4218 }
4219
4220 my $resmice = mon_cmd($vmid, 'query-mice');
4221 foreach my $mice (@$resmice) {
4222 if ($mice->{name} eq 'QEMU HID Tablet') {
4223 $devices->{tablet} = 1;
4224 last;
4225 }
4226 }
4227
4228 # for usb devices there is no query-usb
4229 # but we can iterate over the entries in
4230 # qom-list path=/machine/peripheral
4231 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4232 foreach my $per (@$resperipheral) {
4233 if ($per->{name} =~ m/^usb(?:redirdev)?\d+$/) {
4234 $devices->{$per->{name}} = 1;
4235 }
4236 }
4237
4238 return $devices;
4239 }
4240
4241 sub vm_deviceplug {
4242 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4243
4244 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4245
4246 my $devices_list = vm_devices_list($vmid);
4247 return 1 if defined($devices_list->{$deviceid});
4248
4249 # add PCI bridge if we need it for the device
4250 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4251
4252 if ($deviceid eq 'tablet') {
4253 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4254 } elsif ($deviceid eq 'keyboard') {
4255 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4256 } elsif ($deviceid =~ m/^usbredirdev(\d+)$/) {
4257 my $id = $1;
4258 qemu_spice_usbredir_chardev_add($vmid, "usbredirchardev$id");
4259 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_spice_usbdevice($id, "xhci", $id + 1));
4260 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4261 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device, {}, $1 + 1));
4262 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4263 qemu_iothread_add($vmid, $deviceid, $device);
4264
4265 qemu_driveadd($storecfg, $vmid, $device);
4266 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4267
4268 qemu_deviceadd($vmid, $devicefull);
4269 eval { qemu_deviceaddverify($vmid, $deviceid); };
4270 if (my $err = $@) {
4271 eval { qemu_drivedel($vmid, $deviceid); };
4272 warn $@ if $@;
4273 die $err;
4274 }
4275 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4276 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4277 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4278 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4279
4280 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4281
4282 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4283 qemu_iothread_add($vmid, $deviceid, $device);
4284 $devicefull .= ",iothread=iothread-$deviceid";
4285 }
4286
4287 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4288 $devicefull .= ",num_queues=$device->{queues}";
4289 }
4290
4291 qemu_deviceadd($vmid, $devicefull);
4292 qemu_deviceaddverify($vmid, $deviceid);
4293 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4294 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4295 qemu_driveadd($storecfg, $vmid, $device);
4296
4297 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4298 eval { qemu_deviceadd($vmid, $devicefull); };
4299 if (my $err = $@) {
4300 eval { qemu_drivedel($vmid, $deviceid); };
4301 warn $@ if $@;
4302 die $err;
4303 }
4304 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4305 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4306
4307 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4308 my $machine_version = PVE::QemuServer::Machine::extract_version($machine_type);
4309 my $use_old_bios_files = undef;
4310 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4311
4312 my $netdevicefull = print_netdevice_full(
4313 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type, $machine_version);
4314 qemu_deviceadd($vmid, $netdevicefull);
4315 eval {
4316 qemu_deviceaddverify($vmid, $deviceid);
4317 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4318 };
4319 if (my $err = $@) {
4320 eval { qemu_netdevdel($vmid, $deviceid); };
4321 warn $@ if $@;
4322 die $err;
4323 }
4324 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4325 my $bridgeid = $2;
4326 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4327 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4328
4329 qemu_deviceadd($vmid, $devicefull);
4330 qemu_deviceaddverify($vmid, $deviceid);
4331 } else {
4332 die "can't hotplug device '$deviceid'\n";
4333 }
4334
4335 return 1;
4336 }
4337
4338 # fixme: this should raise exceptions on error!
4339 sub vm_deviceunplug {
4340 my ($vmid, $conf, $deviceid) = @_;
4341
4342 my $devices_list = vm_devices_list($vmid);
4343 return 1 if !defined($devices_list->{$deviceid});
4344
4345 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4346 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4347
4348 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard' || $deviceid eq 'xhci') {
4349 qemu_devicedel($vmid, $deviceid);
4350 } elsif ($deviceid =~ m/^usbredirdev\d+$/) {
4351 qemu_devicedel($vmid, $deviceid);
4352 qemu_devicedelverify($vmid, $deviceid);
4353 } elsif ($deviceid =~ m/^usb\d+$/) {
4354 qemu_devicedel($vmid, $deviceid);
4355 qemu_devicedelverify($vmid, $deviceid);
4356 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4357 my $device = parse_drive($deviceid, $conf->{$deviceid});
4358
4359 qemu_devicedel($vmid, $deviceid);
4360 qemu_devicedelverify($vmid, $deviceid);
4361 qemu_drivedel($vmid, $deviceid);
4362 qemu_iothread_del($vmid, $deviceid, $device);
4363 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4364 qemu_devicedel($vmid, $deviceid);
4365 qemu_devicedelverify($vmid, $deviceid);
4366 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4367 my $device = parse_drive($deviceid, $conf->{$deviceid});
4368
4369 qemu_devicedel($vmid, $deviceid);
4370 qemu_devicedelverify($vmid, $deviceid);
4371 qemu_drivedel($vmid, $deviceid);
4372 qemu_deletescsihw($conf, $vmid, $deviceid);
4373
4374 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4375 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4376 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4377 qemu_devicedel($vmid, $deviceid);
4378 qemu_devicedelverify($vmid, $deviceid);
4379 qemu_netdevdel($vmid, $deviceid);
4380 } else {
4381 die "can't unplug device '$deviceid'\n";
4382 }
4383
4384 return 1;
4385 }
4386
4387 sub qemu_spice_usbredir_chardev_add {
4388 my ($vmid, $id) = @_;
4389
4390 mon_cmd($vmid, "chardev-add" , (
4391 id => $id,
4392 backend => {
4393 type => 'spicevmc',
4394 data => {
4395 type => "usbredir",
4396 },
4397 },
4398 ));
4399 }
4400
4401 sub qemu_deviceadd {
4402 my ($vmid, $devicefull) = @_;
4403
4404 $devicefull = "driver=".$devicefull;
4405 my %options = split(/[=,]/, $devicefull);
4406
4407 mon_cmd($vmid, "device_add" , %options);
4408 }
4409
4410 sub qemu_devicedel {
4411 my ($vmid, $deviceid) = @_;
4412
4413 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
4414 }
4415
4416 sub qemu_iothread_add {
4417 my ($vmid, $deviceid, $device) = @_;
4418
4419 if ($device->{iothread}) {
4420 my $iothreads = vm_iothreads_list($vmid);
4421 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4422 }
4423 }
4424
4425 sub qemu_iothread_del {
4426 my ($vmid, $deviceid, $device) = @_;
4427
4428 if ($device->{iothread}) {
4429 my $iothreads = vm_iothreads_list($vmid);
4430 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4431 }
4432 }
4433
4434 sub qemu_objectadd {
4435 my ($vmid, $objectid, $qomtype) = @_;
4436
4437 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4438
4439 return 1;
4440 }
4441
4442 sub qemu_objectdel {
4443 my ($vmid, $objectid) = @_;
4444
4445 mon_cmd($vmid, "object-del", id => $objectid);
4446
4447 return 1;
4448 }
4449
4450 sub qemu_driveadd {
4451 my ($storecfg, $vmid, $device) = @_;
4452
4453 my $kvmver = get_running_qemu_version($vmid);
4454 my $io_uring = min_version($kvmver, 6, 0);
4455 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4456 $drive =~ s/\\/\\\\/g;
4457 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4458
4459 # If the command succeeds qemu prints: "OK"
4460 return 1 if $ret =~ m/OK/s;
4461
4462 die "adding drive failed: $ret\n";
4463 }
4464
4465 sub qemu_drivedel {
4466 my ($vmid, $deviceid) = @_;
4467
4468 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4469 $ret =~ s/^\s+//;
4470
4471 return 1 if $ret eq "";
4472
4473 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4474 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4475
4476 die "deleting drive $deviceid failed : $ret\n";
4477 }
4478
4479 sub qemu_deviceaddverify {
4480 my ($vmid, $deviceid) = @_;
4481
4482 for (my $i = 0; $i <= 5; $i++) {
4483 my $devices_list = vm_devices_list($vmid);
4484 return 1 if defined($devices_list->{$deviceid});
4485 sleep 1;
4486 }
4487
4488 die "error on hotplug device '$deviceid'\n";
4489 }
4490
4491
4492 sub qemu_devicedelverify {
4493 my ($vmid, $deviceid) = @_;
4494
4495 # need to verify that the device is correctly removed as device_del
4496 # is async and empty return is not reliable
4497
4498 for (my $i = 0; $i <= 5; $i++) {
4499 my $devices_list = vm_devices_list($vmid);
4500 return 1 if !defined($devices_list->{$deviceid});
4501 sleep 1;
4502 }
4503
4504 die "error on hot-unplugging device '$deviceid'\n";
4505 }
4506
4507 sub qemu_findorcreatescsihw {
4508 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4509
4510 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4511
4512 my $scsihwid="$controller_prefix$controller";
4513 my $devices_list = vm_devices_list($vmid);
4514
4515 if (!defined($devices_list->{$scsihwid})) {
4516 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4517 }
4518
4519 return 1;
4520 }
4521
4522 sub qemu_deletescsihw {
4523 my ($conf, $vmid, $opt) = @_;
4524
4525 my $device = parse_drive($opt, $conf->{$opt});
4526
4527 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4528 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4529 return 1;
4530 }
4531
4532 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4533
4534 my $devices_list = vm_devices_list($vmid);
4535 foreach my $opt (keys %{$devices_list}) {
4536 if (is_valid_drivename($opt)) {
4537 my $drive = parse_drive($opt, $conf->{$opt});
4538 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4539 return 1;
4540 }
4541 }
4542 }
4543
4544 my $scsihwid="scsihw$controller";
4545
4546 vm_deviceunplug($vmid, $conf, $scsihwid);
4547
4548 return 1;
4549 }
4550
4551 sub qemu_add_pci_bridge {
4552 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4553
4554 my $bridges = {};
4555
4556 my $bridgeid;
4557
4558 print_pci_addr($device, $bridges, $arch, $machine_type);
4559
4560 while (my ($k, $v) = each %$bridges) {
4561 $bridgeid = $k;
4562 }
4563 return 1 if !defined($bridgeid) || $bridgeid < 1;
4564
4565 my $bridge = "pci.$bridgeid";
4566 my $devices_list = vm_devices_list($vmid);
4567
4568 if (!defined($devices_list->{$bridge})) {
4569 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4570 }
4571
4572 return 1;
4573 }
4574
4575 sub qemu_set_link_status {
4576 my ($vmid, $device, $up) = @_;
4577
4578 mon_cmd($vmid, "set_link", name => $device,
4579 up => $up ? JSON::true : JSON::false);
4580 }
4581
4582 sub qemu_netdevadd {
4583 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4584
4585 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4586 my %options = split(/[=,]/, $netdev);
4587
4588 if (defined(my $vhost = $options{vhost})) {
4589 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4590 }
4591
4592 if (defined(my $queues = $options{queues})) {
4593 $options{queues} = $queues + 0;
4594 }
4595
4596 mon_cmd($vmid, "netdev_add", %options);
4597 return 1;
4598 }
4599
4600 sub qemu_netdevdel {
4601 my ($vmid, $deviceid) = @_;
4602
4603 mon_cmd($vmid, "netdev_del", id => $deviceid);
4604 }
4605
4606 sub qemu_usb_hotplug {
4607 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4608
4609 return if !$device;
4610
4611 # remove the old one first
4612 vm_deviceunplug($vmid, $conf, $deviceid);
4613
4614 # check if xhci controller is necessary and available
4615 my $devicelist = vm_devices_list($vmid);
4616
4617 if (!$devicelist->{xhci}) {
4618 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4619 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr));
4620 }
4621
4622 # print_usbdevice_full expects the parsed device
4623 my $d = parse_usb_device($device->{host});
4624 $d->{usb3} = $device->{usb3};
4625
4626 # add the new one
4627 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d, $arch, $machine_type);
4628 }
4629
4630 sub qemu_cpu_hotplug {
4631 my ($vmid, $conf, $vcpus) = @_;
4632
4633 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4634
4635 my $sockets = 1;
4636 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4637 $sockets = $conf->{sockets} if $conf->{sockets};
4638 my $cores = $conf->{cores} || 1;
4639 my $maxcpus = $sockets * $cores;
4640
4641 $vcpus = $maxcpus if !$vcpus;
4642
4643 die "you can't add more vcpus than maxcpus\n"
4644 if $vcpus > $maxcpus;
4645
4646 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4647
4648 if ($vcpus < $currentvcpus) {
4649
4650 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4651
4652 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4653 qemu_devicedel($vmid, "cpu$i");
4654 my $retry = 0;
4655 my $currentrunningvcpus = undef;
4656 while (1) {
4657 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4658 last if scalar(@{$currentrunningvcpus}) == $i-1;
4659 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4660 $retry++;
4661 sleep 1;
4662 }
4663 #update conf after each succesfull cpu unplug
4664 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4665 PVE::QemuConfig->write_config($vmid, $conf);
4666 }
4667 } else {
4668 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4669 }
4670
4671 return;
4672 }
4673
4674 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4675 die "vcpus in running vm does not match its configuration\n"
4676 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4677
4678 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4679
4680 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4681 my $cpustr = print_cpu_device($conf, $i);
4682 qemu_deviceadd($vmid, $cpustr);
4683
4684 my $retry = 0;
4685 my $currentrunningvcpus = undef;
4686 while (1) {
4687 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4688 last if scalar(@{$currentrunningvcpus}) == $i;
4689 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4690 sleep 1;
4691 $retry++;
4692 }
4693 #update conf after each succesfull cpu hotplug
4694 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4695 PVE::QemuConfig->write_config($vmid, $conf);
4696 }
4697 } else {
4698
4699 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4700 mon_cmd($vmid, "cpu-add", id => int($i));
4701 }
4702 }
4703 }
4704
4705 sub qemu_block_set_io_throttle {
4706 my ($vmid, $deviceid,
4707 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4708 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4709 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4710 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4711
4712 return if !check_running($vmid) ;
4713
4714 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4715 bps => int($bps),
4716 bps_rd => int($bps_rd),
4717 bps_wr => int($bps_wr),
4718 iops => int($iops),
4719 iops_rd => int($iops_rd),
4720 iops_wr => int($iops_wr),
4721 bps_max => int($bps_max),
4722 bps_rd_max => int($bps_rd_max),
4723 bps_wr_max => int($bps_wr_max),
4724 iops_max => int($iops_max),
4725 iops_rd_max => int($iops_rd_max),
4726 iops_wr_max => int($iops_wr_max),
4727 bps_max_length => int($bps_max_length),
4728 bps_rd_max_length => int($bps_rd_max_length),
4729 bps_wr_max_length => int($bps_wr_max_length),
4730 iops_max_length => int($iops_max_length),
4731 iops_rd_max_length => int($iops_rd_max_length),
4732 iops_wr_max_length => int($iops_wr_max_length),
4733 );
4734
4735 }
4736
4737 sub qemu_block_resize {
4738 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4739
4740 my $running = check_running($vmid);
4741
4742 $size = 0 if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4743
4744 return if !$running;
4745
4746 my $padding = (1024 - $size % 1024) % 1024;
4747 $size = $size + $padding;
4748
4749 mon_cmd(
4750 $vmid,
4751 "block_resize",
4752 device => $deviceid,
4753 size => int($size),
4754 timeout => 60,
4755 );
4756 }
4757
4758 sub qemu_volume_snapshot {
4759 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4760
4761 my $running = check_running($vmid);
4762
4763 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4764 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4765 } else {
4766 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4767 }
4768 }
4769
4770 sub qemu_volume_snapshot_delete {
4771 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4772
4773 my $running = check_running($vmid);
4774
4775 if($running) {
4776
4777 $running = undef;
4778 my $conf = PVE::QemuConfig->load_config($vmid);
4779 PVE::QemuConfig->foreach_volume($conf, sub {
4780 my ($ds, $drive) = @_;
4781 $running = 1 if $drive->{file} eq $volid;
4782 });
4783 }
4784
4785 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4786 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
4787 } else {
4788 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4789 }
4790 }
4791
4792 sub set_migration_caps {
4793 my ($vmid, $savevm) = @_;
4794
4795 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4796
4797 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4798 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4799
4800 my $cap_ref = [];
4801
4802 my $enabled_cap = {
4803 "auto-converge" => 1,
4804 "xbzrle" => 1,
4805 "x-rdma-pin-all" => 0,
4806 "zero-blocks" => 0,
4807 "compress" => 0,
4808 "dirty-bitmaps" => $dirty_bitmaps,
4809 };
4810
4811 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4812
4813 for my $supported_capability (@$supported_capabilities) {
4814 push @$cap_ref, {
4815 capability => $supported_capability->{capability},
4816 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4817 };
4818 }
4819
4820 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4821 }
4822
4823 sub foreach_volid {
4824 my ($conf, $func, @param) = @_;
4825
4826 my $volhash = {};
4827
4828 my $test_volid = sub {
4829 my ($key, $drive, $snapname) = @_;
4830
4831 my $volid = $drive->{file};
4832 return if !$volid;
4833
4834 $volhash->{$volid}->{cdrom} //= 1;
4835 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4836
4837 my $replicate = $drive->{replicate} // 1;
4838 $volhash->{$volid}->{replicate} //= 0;
4839 $volhash->{$volid}->{replicate} = 1 if $replicate;
4840
4841 $volhash->{$volid}->{shared} //= 0;
4842 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4843
4844 $volhash->{$volid}->{referenced_in_config} //= 0;
4845 $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname);
4846
4847 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4848 if defined($snapname);
4849
4850 my $size = $drive->{size};
4851 $volhash->{$volid}->{size} //= $size if $size;
4852
4853 $volhash->{$volid}->{is_vmstate} //= 0;
4854 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4855
4856 $volhash->{$volid}->{is_tpmstate} //= 0;
4857 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4858
4859 $volhash->{$volid}->{is_unused} //= 0;
4860 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4861
4862 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4863 };
4864
4865 my $include_opts = {
4866 extra_keys => ['vmstate'],
4867 include_unused => 1,
4868 };
4869
4870 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4871 foreach my $snapname (keys %{$conf->{snapshots}}) {
4872 my $snap = $conf->{snapshots}->{$snapname};
4873 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4874 }
4875
4876 foreach my $volid (keys %$volhash) {
4877 &$func($volid, $volhash->{$volid}, @param);
4878 }
4879 }
4880
4881 my $fast_plug_option = {
4882 'lock' => 1,
4883 'name' => 1,
4884 'onboot' => 1,
4885 'shares' => 1,
4886 'startup' => 1,
4887 'description' => 1,
4888 'protection' => 1,
4889 'vmstatestorage' => 1,
4890 'hookscript' => 1,
4891 'tags' => 1,
4892 };
4893
4894 for my $opt (keys %$confdesc_cloudinit) {
4895 $fast_plug_option->{$opt} = 1;
4896 };
4897
4898 # hotplug changes in [PENDING]
4899 # $selection hash can be used to only apply specified options, for
4900 # example: { cores => 1 } (only apply changed 'cores')
4901 # $errors ref is used to return error messages
4902 sub vmconfig_hotplug_pending {
4903 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4904
4905 my $defaults = load_defaults();
4906 my $arch = get_vm_arch($conf);
4907 my $machine_type = get_vm_machine($conf, undef, $arch);
4908
4909 # commit values which do not have any impact on running VM first
4910 # Note: those option cannot raise errors, we we do not care about
4911 # $selection and always apply them.
4912
4913 my $add_error = sub {
4914 my ($opt, $msg) = @_;
4915 $errors->{$opt} = "hotplug problem - $msg";
4916 };
4917
4918 my $cloudinit_pending_properties = PVE::QemuServer::cloudinit_pending_properties();
4919
4920 my $cloudinit_record_changed = sub {
4921 my ($conf, $opt, $old, $new) = @_;
4922 return if !$cloudinit_pending_properties->{$opt};
4923
4924 my $ci = ($conf->{cloudinit} //= {});
4925
4926 my $recorded = $ci->{$opt};
4927 my %added = map { $_ => 1 } PVE::Tools::split_list(delete($ci->{added}) // '');
4928
4929 if (defined($new)) {
4930 if (defined($old)) {
4931 # an existing value is being modified
4932 if (defined($recorded)) {
4933 # the value was already not in sync
4934 if ($new eq $recorded) {
4935 # a value is being reverted to the cloud-init state:
4936 delete $ci->{$opt};
4937 delete $added{$opt};
4938 } else {
4939 # the value was changed multiple times, do nothing
4940 }
4941 } elsif ($added{$opt}) {
4942 # the value had been marked as added and is being changed, do nothing
4943 } else {
4944 # the value is new, record it:
4945 $ci->{$opt} = $old;
4946 }
4947 } else {
4948 # a new value is being added
4949 if (defined($recorded)) {
4950 # it was already not in sync
4951 if ($new eq $recorded) {
4952 # a value is being reverted to the cloud-init state:
4953 delete $ci->{$opt};
4954 delete $added{$opt};
4955 } else {
4956 # the value had temporarily been removed, do nothing
4957 }
4958 } elsif ($added{$opt}) {
4959 # the value had been marked as added already, do nothing
4960 } else {
4961 # the value is new, add it
4962 $added{$opt} = 1;
4963 }
4964 }
4965 } elsif (!defined($old)) {
4966 # a non-existent value is being removed? ignore...
4967 } else {
4968 # a value is being deleted
4969 if (defined($recorded)) {
4970 # a value was already recorded, just keep it
4971 } elsif ($added{$opt}) {
4972 # the value was marked as added, remove it
4973 delete $added{$opt};
4974 } else {
4975 # a previously unrecorded value is being removed, record the old value:
4976 $ci->{$opt} = $old;
4977 }
4978 }
4979
4980 my $added = join(',', sort keys %added);
4981 $ci->{added} = $added if length($added);
4982 };
4983
4984 my $changes = 0;
4985 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4986 if ($fast_plug_option->{$opt}) {
4987 my $new = delete $conf->{pending}->{$opt};
4988 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $new);
4989 $conf->{$opt} = $new;
4990 $changes = 1;
4991 }
4992 }
4993
4994 if ($changes) {
4995 PVE::QemuConfig->write_config($vmid, $conf);
4996 }
4997
4998 my $ostype = $conf->{ostype};
4999 my $version = extract_version($machine_type, get_running_qemu_version($vmid));
5000 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
5001 my $usb_hotplug = $hotplug_features->{usb}
5002 && min_version($version, 7, 1)
5003 && defined($ostype) && ($ostype eq 'l26' || windows_version($ostype) > 7);
5004
5005 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
5006 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
5007
5008 foreach my $opt (sort keys %$pending_delete_hash) {
5009 next if $selection && !$selection->{$opt};
5010 my $force = $pending_delete_hash->{$opt}->{force};
5011 eval {
5012 if ($opt eq 'hotplug') {
5013 die "skip\n" if ($conf->{hotplug} =~ /memory/);
5014 } elsif ($opt eq 'tablet') {
5015 die "skip\n" if !$hotplug_features->{usb};
5016 if ($defaults->{tablet}) {
5017 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5018 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5019 if $arch eq 'aarch64';
5020 } else {
5021 vm_deviceunplug($vmid, $conf, 'tablet');
5022 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
5023 }
5024 } elsif ($opt =~ m/^usb(\d+)$/) {
5025 my $index = $1;
5026 die "skip\n" if !$usb_hotplug;
5027 vm_deviceunplug($vmid, $conf, "usbredirdev$index"); # if it's a spice port
5028 vm_deviceunplug($vmid, $conf, $opt);
5029 } elsif ($opt eq 'vcpus') {
5030 die "skip\n" if !$hotplug_features->{cpu};
5031 qemu_cpu_hotplug($vmid, $conf, undef);
5032 } elsif ($opt eq 'balloon') {
5033 # enable balloon device is not hotpluggable
5034 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
5035 # here we reset the ballooning value to memory
5036 my $balloon = $conf->{memory} || $defaults->{memory};
5037 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
5038 } elsif ($fast_plug_option->{$opt}) {
5039 # do nothing
5040 } elsif ($opt =~ m/^net(\d+)$/) {
5041 die "skip\n" if !$hotplug_features->{network};
5042 vm_deviceunplug($vmid, $conf, $opt);
5043 } elsif (is_valid_drivename($opt)) {
5044 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
5045 vm_deviceunplug($vmid, $conf, $opt);
5046 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5047 } elsif ($opt =~ m/^memory$/) {
5048 die "skip\n" if !$hotplug_features->{memory};
5049 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
5050 } elsif ($opt eq 'cpuunits') {
5051 $cgroup->change_cpu_shares(undef);
5052 } elsif ($opt eq 'cpulimit') {
5053 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
5054 } else {
5055 die "skip\n";
5056 }
5057 };
5058 if (my $err = $@) {
5059 &$add_error($opt, $err) if $err ne "skip\n";
5060 } else {
5061 my $old = delete $conf->{$opt};
5062 $cloudinit_record_changed->($conf, $opt, $old, undef);
5063 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5064 }
5065 }
5066
5067 my $cloudinit_opt;
5068 foreach my $opt (keys %{$conf->{pending}}) {
5069 next if $selection && !$selection->{$opt};
5070 my $value = $conf->{pending}->{$opt};
5071 eval {
5072 if ($opt eq 'hotplug') {
5073 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
5074 } elsif ($opt eq 'tablet') {
5075 die "skip\n" if !$hotplug_features->{usb};
5076 if ($value == 1) {
5077 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5078 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5079 if $arch eq 'aarch64';
5080 } elsif ($value == 0) {
5081 vm_deviceunplug($vmid, $conf, 'tablet');
5082 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
5083 }
5084 } elsif ($opt =~ m/^usb(\d+)$/) {
5085 my $index = $1;
5086 die "skip\n" if !$usb_hotplug;
5087 my $d = eval { parse_property_string($usbdesc->{format}, $value) };
5088 my $id = $opt;
5089 if ($d->{host} eq 'spice') {
5090 $id = "usbredirdev$index";
5091 }
5092 qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type);
5093 } elsif ($opt eq 'vcpus') {
5094 die "skip\n" if !$hotplug_features->{cpu};
5095 qemu_cpu_hotplug($vmid, $conf, $value);
5096 } elsif ($opt eq 'balloon') {
5097 # enable/disable balloning device is not hotpluggable
5098 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
5099 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
5100 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
5101
5102 # allow manual ballooning if shares is set to zero
5103 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
5104 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
5105 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
5106 }
5107 } elsif ($opt =~ m/^net(\d+)$/) {
5108 # some changes can be done without hotplug
5109 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
5110 $vmid, $opt, $value, $arch, $machine_type);
5111 } elsif (is_valid_drivename($opt)) {
5112 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
5113 # some changes can be done without hotplug
5114 my $drive = parse_drive($opt, $value);
5115 if (drive_is_cloudinit($drive)) {
5116 $cloudinit_opt = [$opt, $drive];
5117 # apply all the other changes first, then generate the cloudinit disk
5118 die "skip\n";
5119 }
5120 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5121 $vmid, $opt, $value, $arch, $machine_type);
5122 } elsif ($opt =~ m/^memory$/) { #dimms
5123 die "skip\n" if !$hotplug_features->{memory};
5124 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
5125 } elsif ($opt eq 'cpuunits') {
5126 my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
5127 $cgroup->change_cpu_shares($new_cpuunits);
5128 } elsif ($opt eq 'cpulimit') {
5129 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
5130 $cgroup->change_cpu_quota($cpulimit, 100000);
5131 } elsif ($opt eq 'agent') {
5132 vmconfig_update_agent($conf, $opt, $value);
5133 } else {
5134 die "skip\n"; # skip non-hot-pluggable options
5135 }
5136 };
5137 if (my $err = $@) {
5138 &$add_error($opt, $err) if $err ne "skip\n";
5139 } else {
5140 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $value);
5141 $conf->{$opt} = $value;
5142 delete $conf->{pending}->{$opt};
5143 }
5144 }
5145
5146 if (defined($cloudinit_opt)) {
5147 my ($opt, $drive) = @$cloudinit_opt;
5148 my $value = $conf->{pending}->{$opt};
5149 eval {
5150 my $temp = {%$conf, $opt => $value};
5151 PVE::QemuServer::Cloudinit::apply_cloudinit_config($temp, $vmid);
5152 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5153 $vmid, $opt, $value, $arch, $machine_type);
5154 };
5155 if (my $err = $@) {
5156 &$add_error($opt, $err) if $err ne "skip\n";
5157 } else {
5158 $conf->{$opt} = $value;
5159 delete $conf->{pending}->{$opt};
5160 }
5161 }
5162
5163 # unplug xhci controller if no usb device is left
5164 if ($usb_hotplug) {
5165 my $has_usb = 0;
5166 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
5167 next if !defined($conf->{"usb$i"});
5168 $has_usb = 1;
5169 last;
5170 }
5171 if (!$has_usb) {
5172 vm_deviceunplug($vmid, $conf, 'xhci');
5173 }
5174 }
5175
5176 PVE::QemuConfig->write_config($vmid, $conf);
5177
5178 if ($hotplug_features->{cloudinit} && PVE::QemuServer::Cloudinit::has_changes($conf)) {
5179 PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid);
5180 }
5181 }
5182
5183 sub try_deallocate_drive {
5184 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
5185
5186 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
5187 my $volid = $drive->{file};
5188 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
5189 my $sid = PVE::Storage::parse_volume_id($volid);
5190 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
5191
5192 # check if the disk is really unused
5193 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
5194 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
5195 PVE::Storage::vdisk_free($storecfg, $volid);
5196 return 1;
5197 } else {
5198 # If vm is not owner of this disk remove from config
5199 return 1;
5200 }
5201 }
5202
5203 return;
5204 }
5205
5206 sub vmconfig_delete_or_detach_drive {
5207 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
5208
5209 my $drive = parse_drive($opt, $conf->{$opt});
5210
5211 my $rpcenv = PVE::RPCEnvironment::get();
5212 my $authuser = $rpcenv->get_user();
5213
5214 if ($force) {
5215 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
5216 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
5217 } else {
5218 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
5219 }
5220 }
5221
5222
5223
5224 sub vmconfig_apply_pending {
5225 my ($vmid, $conf, $storecfg, $errors, $skip_cloud_init) = @_;
5226
5227 return if !scalar(keys %{$conf->{pending}});
5228
5229 my $add_apply_error = sub {
5230 my ($opt, $msg) = @_;
5231 my $err_msg = "unable to apply pending change $opt : $msg";
5232 $errors->{$opt} = $err_msg;
5233 warn $err_msg;
5234 };
5235
5236 # cold plug
5237
5238 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
5239 foreach my $opt (sort keys %$pending_delete_hash) {
5240 my $force = $pending_delete_hash->{$opt}->{force};
5241 eval {
5242 if ($opt =~ m/^unused/) {
5243 die "internal error";
5244 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5245 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5246 }
5247 };
5248 if (my $err = $@) {
5249 $add_apply_error->($opt, $err);
5250 } else {
5251 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5252 delete $conf->{$opt};
5253 }
5254 }
5255
5256 PVE::QemuConfig->cleanup_pending($conf);
5257
5258 my $generate_cloudinit = $skip_cloud_init ? 0 : undef;
5259
5260 foreach my $opt (keys %{$conf->{pending}}) { # add/change
5261 next if $opt eq 'delete'; # just to be sure
5262 eval {
5263 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5264 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
5265 }
5266 };
5267 if (my $err = $@) {
5268 $add_apply_error->($opt, $err);
5269 } else {
5270
5271 if (is_valid_drivename($opt)) {
5272 my $drive = parse_drive($opt, $conf->{pending}->{$opt});
5273 $generate_cloudinit //= 1 if drive_is_cloudinit($drive);
5274 }
5275
5276 $conf->{$opt} = delete $conf->{pending}->{$opt};
5277 }
5278 }
5279
5280 # write all changes at once to avoid unnecessary i/o
5281 PVE::QemuConfig->write_config($vmid, $conf);
5282 if ($generate_cloudinit) {
5283 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5284 # After successful generation and if there were changes to be applied, update the
5285 # config to drop the {cloudinit} entry.
5286 PVE::QemuConfig->write_config($vmid, $conf);
5287 }
5288 }
5289 }
5290
5291 sub vmconfig_update_net {
5292 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5293
5294 my $newnet = parse_net($value);
5295
5296 if ($conf->{$opt}) {
5297 my $oldnet = parse_net($conf->{$opt});
5298
5299 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5300 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5301 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
5302 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
5303
5304 # for non online change, we try to hot-unplug
5305 die "skip\n" if !$hotplug;
5306 vm_deviceunplug($vmid, $conf, $opt);
5307 } else {
5308
5309 die "internal error" if $opt !~ m/net(\d+)/;
5310 my $iface = "tap${vmid}i$1";
5311
5312 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5313 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5314 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5315 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
5316 PVE::Network::tap_unplug($iface);
5317
5318 if ($have_sdn) {
5319 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5320 } else {
5321 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5322 }
5323 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
5324 # Rate can be applied on its own but any change above needs to
5325 # include the rate in tap_plug since OVS resets everything.
5326 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
5327 }
5328
5329 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
5330 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5331 }
5332
5333 return 1;
5334 }
5335 }
5336
5337 if ($hotplug) {
5338 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5339 } else {
5340 die "skip\n";
5341 }
5342 }
5343
5344 sub vmconfig_update_agent {
5345 my ($conf, $opt, $value) = @_;
5346
5347 die "skip\n" if !$conf->{$opt};
5348
5349 my $hotplug_options = { fstrim_cloned_disks => 1 };
5350
5351 my $old_agent = parse_guest_agent($conf);
5352 my $agent = parse_guest_agent({$opt => $value});
5353
5354 for my $option (keys %$agent) { # added/changed options
5355 next if defined($hotplug_options->{$option});
5356 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5357 }
5358
5359 for my $option (keys %$old_agent) { # removed options
5360 next if defined($hotplug_options->{$option});
5361 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5362 }
5363
5364 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
5365 }
5366
5367 sub vmconfig_update_disk {
5368 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5369
5370 my $drive = parse_drive($opt, $value);
5371
5372 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5373 my $media = $drive->{media} || 'disk';
5374 my $oldmedia = $old_drive->{media} || 'disk';
5375 die "unable to change media type\n" if $media ne $oldmedia;
5376
5377 if (!drive_is_cdrom($old_drive)) {
5378
5379 if ($drive->{file} ne $old_drive->{file}) {
5380
5381 die "skip\n" if !$hotplug;
5382
5383 # unplug and register as unused
5384 vm_deviceunplug($vmid, $conf, $opt);
5385 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5386
5387 } else {
5388 # update existing disk
5389
5390 # skip non hotpluggable value
5391 if (safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5392 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5393 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5394 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5395 safe_string_ne($drive->{ssd}, $old_drive->{ssd}) ||
5396 safe_string_ne($drive->{ro}, $old_drive->{ro})) {
5397 die "skip\n";
5398 }
5399
5400 # apply throttle
5401 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5402 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5403 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5404 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5405 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5406 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5407 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5408 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5409 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5410 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5411 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5412 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5413 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5414 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5415 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5416 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5417 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5418 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5419
5420 qemu_block_set_io_throttle(
5421 $vmid,"drive-$opt",
5422 ($drive->{mbps} || 0)*1024*1024,
5423 ($drive->{mbps_rd} || 0)*1024*1024,
5424 ($drive->{mbps_wr} || 0)*1024*1024,
5425 $drive->{iops} || 0,
5426 $drive->{iops_rd} || 0,
5427 $drive->{iops_wr} || 0,
5428 ($drive->{mbps_max} || 0)*1024*1024,
5429 ($drive->{mbps_rd_max} || 0)*1024*1024,
5430 ($drive->{mbps_wr_max} || 0)*1024*1024,
5431 $drive->{iops_max} || 0,
5432 $drive->{iops_rd_max} || 0,
5433 $drive->{iops_wr_max} || 0,
5434 $drive->{bps_max_length} || 1,
5435 $drive->{bps_rd_max_length} || 1,
5436 $drive->{bps_wr_max_length} || 1,
5437 $drive->{iops_max_length} || 1,
5438 $drive->{iops_rd_max_length} || 1,
5439 $drive->{iops_wr_max_length} || 1,
5440 );
5441
5442 }
5443
5444 return 1;
5445 }
5446
5447 } else { # cdrom
5448
5449 if ($drive->{file} eq 'none') {
5450 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5451 if (drive_is_cloudinit($old_drive)) {
5452 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5453 }
5454 } else {
5455 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5456
5457 # force eject if locked
5458 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5459
5460 if ($path) {
5461 mon_cmd($vmid, "blockdev-change-medium",
5462 id => "$opt", filename => "$path");
5463 }
5464 }
5465
5466 return 1;
5467 }
5468 }
5469
5470 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5471 # hotplug new disks
5472 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5473 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5474 }
5475
5476 sub vmconfig_update_cloudinit_drive {
5477 my ($storecfg, $conf, $vmid) = @_;
5478
5479 my $cloudinit_ds = undef;
5480 my $cloudinit_drive = undef;
5481
5482 PVE::QemuConfig->foreach_volume($conf, sub {
5483 my ($ds, $drive) = @_;
5484 if (PVE::QemuServer::drive_is_cloudinit($drive)) {
5485 $cloudinit_ds = $ds;
5486 $cloudinit_drive = $drive;
5487 }
5488 });
5489
5490 return if !$cloudinit_drive;
5491
5492 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5493 PVE::QemuConfig->write_config($vmid, $conf);
5494 }
5495
5496 my $running = PVE::QemuServer::check_running($vmid);
5497
5498 if ($running) {
5499 my $path = PVE::Storage::path($storecfg, $cloudinit_drive->{file});
5500 if ($path) {
5501 mon_cmd($vmid, "eject", force => JSON::true, id => "$cloudinit_ds");
5502 mon_cmd($vmid, "blockdev-change-medium", id => "$cloudinit_ds", filename => "$path");
5503 }
5504 }
5505 }
5506
5507 # called in locked context by incoming migration
5508 sub vm_migrate_get_nbd_disks {
5509 my ($storecfg, $conf, $replicated_volumes) = @_;
5510
5511 my $local_volumes = {};
5512 PVE::QemuConfig->foreach_volume($conf, sub {
5513 my ($ds, $drive) = @_;
5514
5515 return if drive_is_cdrom($drive);
5516 return if $ds eq 'tpmstate0';
5517
5518 my $volid = $drive->{file};
5519
5520 return if !$volid;
5521
5522 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5523
5524 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5525 return if $scfg->{shared};
5526
5527 # replicated disks re-use existing state via bitmap
5528 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5529 $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
5530 });
5531 return $local_volumes;
5532 }
5533
5534 # called in locked context by incoming migration
5535 sub vm_migrate_alloc_nbd_disks {
5536 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5537
5538 my $nbd = {};
5539 foreach my $opt (sort keys %$source_volumes) {
5540 my ($volid, $storeid, $volname, $drive, $use_existing, $format) = @{$source_volumes->{$opt}};
5541
5542 if ($use_existing) {
5543 $nbd->{$opt}->{drivestr} = print_drive($drive);
5544 $nbd->{$opt}->{volid} = $volid;
5545 $nbd->{$opt}->{replicated} = 1;
5546 next;
5547 }
5548
5549 # storage mapping + volname = regular migration
5550 # storage mapping + format = remote migration
5551 # order of precedence, filtered by whether storage supports it:
5552 # 1. explicit requested format
5553 # 2. format of current volume
5554 # 3. default format of storage
5555 if (!$storagemap->{identity}) {
5556 $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
5557 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5558 if (!$format || !grep { $format eq $_ } @$validFormats) {
5559 if ($volname) {
5560 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5561 my $fileFormat = qemu_img_format($scfg, $volname);
5562 $format = $fileFormat
5563 if grep { $fileFormat eq $_ } @$validFormats;
5564 }
5565 $format //= $defFormat;
5566 }
5567 } else {
5568 # can't happen for remote migration, so $volname is always defined
5569 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5570 $format = qemu_img_format($scfg, $volname);
5571 }
5572
5573 my $size = $drive->{size} / 1024;
5574 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5575 my $newdrive = $drive;
5576 $newdrive->{format} = $format;
5577 $newdrive->{file} = $newvolid;
5578 my $drivestr = print_drive($newdrive);
5579 $nbd->{$opt}->{drivestr} = $drivestr;
5580 $nbd->{$opt}->{volid} = $newvolid;
5581 }
5582
5583 return $nbd;
5584 }
5585
5586 # see vm_start_nolock for parameters, additionally:
5587 # migrate_opts:
5588 # storagemap = parsed storage map for allocating NBD disks
5589 sub vm_start {
5590 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5591
5592 return PVE::QemuConfig->lock_config($vmid, sub {
5593 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5594
5595 die "you can't start a vm if it's a template\n"
5596 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5597
5598 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5599 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5600
5601 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5602
5603 if ($has_backup_lock && $running) {
5604 # a backup is currently running, attempt to start the guest in the
5605 # existing QEMU instance
5606 return vm_resume($vmid);
5607 }
5608
5609 PVE::QemuConfig->check_lock($conf)
5610 if !($params->{skiplock} || $has_suspended_lock);
5611
5612 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5613
5614 die "VM $vmid already running\n" if $running;
5615
5616 if (my $storagemap = $migrate_opts->{storagemap}) {
5617 my $replicated = $migrate_opts->{replicated_volumes};
5618 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5619 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5620
5621 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5622 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5623 }
5624 }
5625
5626 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5627 });
5628 }
5629
5630
5631 # params:
5632 # statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5633 # skiplock => 0/1, skip checking for config lock
5634 # skiptemplate => 0/1, skip checking whether VM is template
5635 # forcemachine => to force QEMU machine (rollback/migration)
5636 # forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5637 # timeout => in seconds
5638 # paused => start VM in paused state (backup)
5639 # resume => resume from hibernation
5640 # pbs-backing => {
5641 # sata0 => {
5642 # repository
5643 # snapshot
5644 # keyfile
5645 # archive
5646 # },
5647 # virtio2 => ...
5648 # }
5649 # migrate_opts:
5650 # nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5651 # migratedfrom => source node
5652 # spice_ticket => used for spice migration, passed via tunnel/stdin
5653 # network => CIDR of migration network
5654 # type => secure/insecure - tunnel over encrypted connection or plain-text
5655 # nbd_proto_version => int, 0 for TCP, 1 for UNIX
5656 # replicated_volumes => which volids should be re-used with bitmaps for nbd migration
5657 # offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
5658 # contained in config
5659 sub vm_start_nolock {
5660 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5661
5662 my $statefile = $params->{statefile};
5663 my $resume = $params->{resume};
5664
5665 my $migratedfrom = $migrate_opts->{migratedfrom};
5666 my $migration_type = $migrate_opts->{type};
5667
5668 my $res = {};
5669
5670 # clean up leftover reboot request files
5671 eval { clear_reboot_request($vmid); };
5672 warn $@ if $@;
5673
5674 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5675 vmconfig_apply_pending($vmid, $conf, $storecfg);
5676 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5677 }
5678
5679 # don't regenerate the ISO if the VM is started as part of a live migration
5680 # this way we can reuse the old ISO with the correct config
5681 if (!$migratedfrom) {
5682 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5683 # FIXME: apply_cloudinit_config updates $conf in this case, and it would only drop
5684 # $conf->{cloudinit}, so we could just not do this?
5685 # But we do it above, so for now let's be consistent.
5686 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5687 }
5688 }
5689
5690 # override offline migrated volumes, conf is out of date still
5691 if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
5692 for my $key (sort keys $offline_volumes->%*) {
5693 my $parsed = parse_drive($key, $conf->{$key});
5694 $parsed->{file} = $offline_volumes->{$key};
5695 $conf->{$key} = print_drive($parsed);
5696 }
5697 }
5698
5699 my $defaults = load_defaults();
5700
5701 # set environment variable useful inside network script
5702 # for remote migration the config is available on the target node!
5703 if (!$migrate_opts->{remote_node}) {
5704 $ENV{PVE_MIGRATED_FROM} = $migratedfrom;
5705 }
5706
5707 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5708
5709 my $forcemachine = $params->{forcemachine};
5710 my $forcecpu = $params->{forcecpu};
5711 if ($resume) {
5712 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5713 $forcemachine = $conf->{runningmachine};
5714 $forcecpu = $conf->{runningcpu};
5715 print "Resuming suspended VM\n";
5716 }
5717
5718 my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid,
5719 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
5720
5721 my $migration_ip;
5722 my $get_migration_ip = sub {
5723 my ($nodename) = @_;
5724
5725 return $migration_ip if defined($migration_ip);
5726
5727 my $cidr = $migrate_opts->{network};
5728
5729 if (!defined($cidr)) {
5730 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5731 $cidr = $dc_conf->{migration}->{network};
5732 }
5733
5734 if (defined($cidr)) {
5735 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5736
5737 die "could not get IP: no address configured on local " .
5738 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5739
5740 die "could not get IP: multiple addresses configured on local " .
5741 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5742
5743 $migration_ip = @$ips[0];
5744 }
5745
5746 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5747 if !defined($migration_ip);
5748
5749 return $migration_ip;
5750 };
5751
5752 if ($statefile) {
5753 if ($statefile eq 'tcp') {
5754 my $migrate = $res->{migrate} = { proto => 'tcp' };
5755 $migrate->{addr} = "localhost";
5756 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5757 my $nodename = nodename();
5758
5759 if (!defined($migration_type)) {
5760 if (defined($datacenterconf->{migration}->{type})) {
5761 $migration_type = $datacenterconf->{migration}->{type};
5762 } else {
5763 $migration_type = 'secure';
5764 }
5765 }
5766
5767 if ($migration_type eq 'insecure') {
5768 $migrate->{addr} = $get_migration_ip->($nodename);
5769 $migrate->{addr} = "[$migrate->{addr}]" if Net::IP::ip_is_ipv6($migrate->{addr});
5770 }
5771
5772 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5773 $migrate->{port} = PVE::Tools::next_migrate_port($pfamily);
5774 $migrate->{uri} = "tcp:$migrate->{addr}:$migrate->{port}";
5775 push @$cmd, '-incoming', $migrate->{uri};
5776 push @$cmd, '-S';
5777
5778 } elsif ($statefile eq 'unix') {
5779 # should be default for secure migrations as a ssh TCP forward
5780 # tunnel is not deterministic reliable ready and fails regurarly
5781 # to set up in time, so use UNIX socket forwards
5782 my $migrate = $res->{migrate} = { proto => 'unix' };
5783 $migrate->{addr} = "/run/qemu-server/$vmid.migrate";
5784 unlink $migrate->{addr};
5785
5786 $migrate->{uri} = "unix:$migrate->{addr}";
5787 push @$cmd, '-incoming', $migrate->{uri};
5788 push @$cmd, '-S';
5789
5790 } elsif (-e $statefile) {
5791 push @$cmd, '-loadstate', $statefile;
5792 } else {
5793 my $statepath = PVE::Storage::path($storecfg, $statefile);
5794 push @$vollist, $statefile;
5795 push @$cmd, '-loadstate', $statepath;
5796 }
5797 } elsif ($params->{paused}) {
5798 push @$cmd, '-S';
5799 }
5800
5801 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5802
5803 my $pci_devices = {}; # host pci devices
5804 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
5805 my $dev = $conf->{"hostpci$i"} or next;
5806 $pci_devices->{$i} = parse_hostpci($dev);
5807 }
5808
5809 # do not reserve pciid for mediated devices, sysfs will error out for duplicate assignment
5810 my $real_pci_devices = [ grep { !(defined($_->{mdev}) && scalar($_->{pciid}->@*) == 1) } values $pci_devices->%* ];
5811
5812 # map to a flat list of pci ids
5813 my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } $real_pci_devices->@* ];
5814
5815 # reserve all PCI IDs before actually doing anything with them
5816 PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout);
5817
5818 eval {
5819 my $uuid;
5820 for my $id (sort keys %$pci_devices) {
5821 my $d = $pci_devices->{$id};
5822 for my $dev ($d->{pciid}->@*) {
5823 my $info = PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
5824
5825 # nvidia grid needs the uuid of the mdev as qemu parameter
5826 if ($d->{mdev} && !defined($uuid) && $info->{vendor} eq '10de') {
5827 $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $id);
5828 }
5829 }
5830 }
5831 push @$cmd, '-uuid', $uuid if defined($uuid);
5832 };
5833 if (my $err = $@) {
5834 eval { cleanup_pci_devices($vmid, $conf) };
5835 warn $@ if $@;
5836 die $err;
5837 }
5838
5839 PVE::Storage::activate_volumes($storecfg, $vollist);
5840
5841 eval {
5842 run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{});
5843 };
5844 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5845 # timeout should be more than enough here...
5846 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
5847
5848 my $cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
5849
5850 my %run_params = (
5851 timeout => $statefile ? undef : $start_timeout,
5852 umask => 0077,
5853 noerr => 1,
5854 );
5855
5856 # when migrating, prefix QEMU output so other side can pick up any
5857 # errors that might occur and show the user
5858 if ($migratedfrom) {
5859 $run_params{quiet} = 1;
5860 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5861 }
5862
5863 my %systemd_properties = (
5864 Slice => 'qemu.slice',
5865 KillMode => 'process',
5866 SendSIGKILL => 0,
5867 TimeoutStopUSec => ULONG_MAX, # infinity
5868 );
5869
5870 if (PVE::CGroup::cgroup_mode() == 2) {
5871 $systemd_properties{CPUWeight} = $cpuunits;
5872 } else {
5873 $systemd_properties{CPUShares} = $cpuunits;
5874 }
5875
5876 if (my $cpulimit = $conf->{cpulimit}) {
5877 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5878 }
5879 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5880
5881 my $run_qemu = sub {
5882 PVE::Tools::run_fork sub {
5883 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5884
5885 my $tpmpid;
5886 if (my $tpm = $conf->{tpmstate0}) {
5887 # start the TPM emulator so QEMU can connect on start
5888 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5889 }
5890
5891 my $exitcode = run_command($cmd, %run_params);
5892 if ($exitcode) {
5893 if ($tpmpid) {
5894 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5895 kill 'TERM', $tpmpid;
5896 }
5897 die "QEMU exited with code $exitcode\n";
5898 }
5899 };
5900 };
5901
5902 if ($conf->{hugepages}) {
5903
5904 my $code = sub {
5905 my $hugepages_topology = PVE::QemuServer::Memory::hugepages_topology($conf);
5906 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5907
5908 PVE::QemuServer::Memory::hugepages_mount();
5909 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5910
5911 eval { $run_qemu->() };
5912 if (my $err = $@) {
5913 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5914 if !$conf->{keephugepages};
5915 die $err;
5916 }
5917
5918 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5919 if !$conf->{keephugepages};
5920 };
5921 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5922
5923 } else {
5924 eval { $run_qemu->() };
5925 }
5926
5927 if (my $err = $@) {
5928 # deactivate volumes if start fails
5929 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5930 warn $@ if $@;
5931 eval { cleanup_pci_devices($vmid, $conf) };
5932 warn $@ if $@;
5933
5934 die "start failed: $err";
5935 }
5936
5937 # re-reserve all PCI IDs now that we can know the actual VM PID
5938 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5939 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) };
5940 warn $@ if $@;
5941
5942 if (defined($res->{migrate})) {
5943 print "migration listens on $res->{migrate}->{uri}\n";
5944 } elsif ($statefile) {
5945 eval { mon_cmd($vmid, "cont"); };
5946 warn $@ if $@;
5947 }
5948
5949 #start nbd server for storage migration
5950 if (my $nbd = $migrate_opts->{nbd}) {
5951 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
5952
5953 my $migrate_storage_uri;
5954 # nbd_protocol_version > 0 for unix socket support
5955 if ($nbd_protocol_version > 0 && ($migration_type eq 'secure' || $migration_type eq 'websocket')) {
5956 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5957 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5958 $migrate_storage_uri = "nbd:unix:$socket_path";
5959 $res->{migrate}->{unix_sockets} = [$socket_path];
5960 } else {
5961 my $nodename = nodename();
5962 my $localip = $get_migration_ip->($nodename);
5963 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5964 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
5965
5966 mon_cmd($vmid, "nbd-server-start", addr => {
5967 type => 'inet',
5968 data => {
5969 host => "${localip}",
5970 port => "${storage_migrate_port}",
5971 },
5972 });
5973 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5974 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
5975 }
5976
5977 my $block_info = mon_cmd($vmid, "query-block");
5978 $block_info = { map { $_->{device} => $_ } $block_info->@* };
5979
5980 foreach my $opt (sort keys %$nbd) {
5981 my $drivestr = $nbd->{$opt}->{drivestr};
5982 my $volid = $nbd->{$opt}->{volid};
5983
5984 my $block_node = $block_info->{"drive-$opt"}->{inserted}->{'node-name'};
5985
5986 mon_cmd(
5987 $vmid,
5988 "block-export-add",
5989 id => "drive-$opt",
5990 'node-name' => $block_node,
5991 writable => JSON::true,
5992 type => "nbd",
5993 name => "drive-$opt", # NBD export name
5994 );
5995
5996 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
5997 print "storage migration listens on $nbd_uri volume:$drivestr\n";
5998 print "re-using replicated volume: $opt - $volid\n"
5999 if $nbd->{$opt}->{replicated};
6000
6001 $res->{drives}->{$opt} = $nbd->{$opt};
6002 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
6003 }
6004 }
6005
6006 if ($migratedfrom) {
6007 eval {
6008 set_migration_caps($vmid);
6009 };
6010 warn $@ if $@;
6011
6012 if ($spice_port) {
6013 print "spice listens on port $spice_port\n";
6014 $res->{spice_port} = $spice_port;
6015 if ($migrate_opts->{spice_ticket}) {
6016 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
6017 $migrate_opts->{spice_ticket});
6018 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
6019 }
6020 }
6021
6022 } else {
6023 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
6024 if !$statefile && $conf->{balloon};
6025
6026 foreach my $opt (keys %$conf) {
6027 next if $opt !~ m/^net\d+$/;
6028 my $nicconf = parse_net($conf->{$opt});
6029 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
6030 }
6031 add_nets_bridge_fdb($conf, $vmid);
6032 }
6033
6034 mon_cmd($vmid, 'qom-set',
6035 path => "machine/peripheral/balloon0",
6036 property => "guest-stats-polling-interval",
6037 value => 2) if (!defined($conf->{balloon}) || $conf->{balloon});
6038
6039 if ($resume) {
6040 print "Resumed VM, removing state\n";
6041 if (my $vmstate = $conf->{vmstate}) {
6042 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6043 PVE::Storage::vdisk_free($storecfg, $vmstate);
6044 }
6045 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
6046 PVE::QemuConfig->write_config($vmid, $conf);
6047 }
6048
6049 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
6050
6051 return $res;
6052 }
6053
6054 sub vm_commandline {
6055 my ($storecfg, $vmid, $snapname) = @_;
6056
6057 my $conf = PVE::QemuConfig->load_config($vmid);
6058
6059 my ($forcemachine, $forcecpu);
6060 if ($snapname) {
6061 my $snapshot = $conf->{snapshots}->{$snapname};
6062 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
6063
6064 # check for machine or CPU overrides in snapshot
6065 $forcemachine = $snapshot->{runningmachine};
6066 $forcecpu = $snapshot->{runningcpu};
6067
6068 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
6069
6070 $conf = $snapshot;
6071 }
6072
6073 my $defaults = load_defaults();
6074
6075 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
6076
6077 return PVE::Tools::cmd2string($cmd);
6078 }
6079
6080 sub vm_reset {
6081 my ($vmid, $skiplock) = @_;
6082
6083 PVE::QemuConfig->lock_config($vmid, sub {
6084
6085 my $conf = PVE::QemuConfig->load_config($vmid);
6086
6087 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6088
6089 mon_cmd($vmid, "system_reset");
6090 });
6091 }
6092
6093 sub get_vm_volumes {
6094 my ($conf) = @_;
6095
6096 my $vollist = [];
6097 foreach_volid($conf, sub {
6098 my ($volid, $attr) = @_;
6099
6100 return if $volid =~ m|^/|;
6101
6102 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
6103 return if !$sid;
6104
6105 push @$vollist, $volid;
6106 });
6107
6108 return $vollist;
6109 }
6110
6111 sub cleanup_pci_devices {
6112 my ($vmid, $conf) = @_;
6113
6114 foreach my $key (keys %$conf) {
6115 next if $key !~ m/^hostpci(\d+)$/;
6116 my $hostpciindex = $1;
6117 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
6118 my $d = parse_hostpci($conf->{$key});
6119 if ($d->{mdev}) {
6120 # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
6121 # don't want to break ABI just for this two liner
6122 my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
6123 PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1") if -e $dev_sysfs_dir;
6124 }
6125 }
6126 PVE::QemuServer::PCI::remove_pci_reservation($vmid);
6127 }
6128
6129 sub vm_stop_cleanup {
6130 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
6131
6132 eval {
6133
6134 if (!$keepActive) {
6135 my $vollist = get_vm_volumes($conf);
6136 PVE::Storage::deactivate_volumes($storecfg, $vollist);
6137
6138 if (my $tpmdrive = $conf->{tpmstate0}) {
6139 my $tpm = parse_drive("tpmstate0", $tpmdrive);
6140 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
6141 if ($storeid) {
6142 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
6143 }
6144 }
6145 }
6146
6147 foreach my $ext (qw(mon qmp pid vnc qga)) {
6148 unlink "/var/run/qemu-server/${vmid}.$ext";
6149 }
6150
6151 if ($conf->{ivshmem}) {
6152 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
6153 # just delete it for now, VMs which have this already open do not
6154 # are affected, but new VMs will get a separated one. If this
6155 # becomes an issue we either add some sort of ref-counting or just
6156 # add a "don't delete on stop" flag to the ivshmem format.
6157 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
6158 }
6159
6160 cleanup_pci_devices($vmid, $conf);
6161
6162 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
6163 };
6164 warn $@ if $@; # avoid errors - just warn
6165 }
6166
6167 # call only in locked context
6168 sub _do_vm_stop {
6169 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
6170
6171 my $pid = check_running($vmid, $nocheck);
6172 return if !$pid;
6173
6174 my $conf;
6175 if (!$nocheck) {
6176 $conf = PVE::QemuConfig->load_config($vmid);
6177 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6178 if (!defined($timeout) && $shutdown && $conf->{startup}) {
6179 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
6180 $timeout = $opts->{down} if $opts->{down};
6181 }
6182 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
6183 }
6184
6185 eval {
6186 if ($shutdown) {
6187 if (defined($conf) && get_qga_key($conf, 'enabled')) {
6188 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
6189 } else {
6190 mon_cmd($vmid, "system_powerdown");
6191 }
6192 } else {
6193 mon_cmd($vmid, "quit");
6194 }
6195 };
6196 my $err = $@;
6197
6198 if (!$err) {
6199 $timeout = 60 if !defined($timeout);
6200
6201 my $count = 0;
6202 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6203 $count++;
6204 sleep 1;
6205 }
6206
6207 if ($count >= $timeout) {
6208 if ($force) {
6209 warn "VM still running - terminating now with SIGTERM\n";
6210 kill 15, $pid;
6211 } else {
6212 die "VM quit/powerdown failed - got timeout\n";
6213 }
6214 } else {
6215 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6216 return;
6217 }
6218 } else {
6219 if (!check_running($vmid, $nocheck)) {
6220 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
6221 return;
6222 }
6223 if ($force) {
6224 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
6225 kill 15, $pid;
6226 } else {
6227 die "VM quit/powerdown failed\n";
6228 }
6229 }
6230
6231 # wait again
6232 $timeout = 10;
6233
6234 my $count = 0;
6235 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6236 $count++;
6237 sleep 1;
6238 }
6239
6240 if ($count >= $timeout) {
6241 warn "VM still running - terminating now with SIGKILL\n";
6242 kill 9, $pid;
6243 sleep 1;
6244 }
6245
6246 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6247 }
6248
6249 # Note: use $nocheck to skip tests if VM configuration file exists.
6250 # We need that when migration VMs to other nodes (files already moved)
6251 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
6252 sub vm_stop {
6253 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
6254
6255 $force = 1 if !defined($force) && !$shutdown;
6256
6257 if ($migratedfrom){
6258 my $pid = check_running($vmid, $nocheck, $migratedfrom);
6259 kill 15, $pid if $pid;
6260 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
6261 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
6262 return;
6263 }
6264
6265 PVE::QemuConfig->lock_config($vmid, sub {
6266 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
6267 });
6268 }
6269
6270 sub vm_reboot {
6271 my ($vmid, $timeout) = @_;
6272
6273 PVE::QemuConfig->lock_config($vmid, sub {
6274 eval {
6275
6276 # only reboot if running, as qmeventd starts it again on a stop event
6277 return if !check_running($vmid);
6278
6279 create_reboot_request($vmid);
6280
6281 my $storecfg = PVE::Storage::config();
6282 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
6283
6284 };
6285 if (my $err = $@) {
6286 # avoid that the next normal shutdown will be confused for a reboot
6287 clear_reboot_request($vmid);
6288 die $err;
6289 }
6290 });
6291 }
6292
6293 # note: if using the statestorage parameter, the caller has to check privileges
6294 sub vm_suspend {
6295 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
6296
6297 my $conf;
6298 my $path;
6299 my $storecfg;
6300 my $vmstate;
6301
6302 PVE::QemuConfig->lock_config($vmid, sub {
6303
6304 $conf = PVE::QemuConfig->load_config($vmid);
6305
6306 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
6307 PVE::QemuConfig->check_lock($conf)
6308 if !($skiplock || $is_backing_up);
6309
6310 die "cannot suspend to disk during backup\n"
6311 if $is_backing_up && $includestate;
6312
6313 if ($includestate) {
6314 $conf->{lock} = 'suspending';
6315 my $date = strftime("%Y-%m-%d", localtime(time()));
6316 $storecfg = PVE::Storage::config();
6317 if (!$statestorage) {
6318 $statestorage = find_vmstate_storage($conf, $storecfg);
6319 # check permissions for the storage
6320 my $rpcenv = PVE::RPCEnvironment::get();
6321 if ($rpcenv->{type} ne 'cli') {
6322 my $authuser = $rpcenv->get_user();
6323 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
6324 }
6325 }
6326
6327
6328 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
6329 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
6330 $path = PVE::Storage::path($storecfg, $vmstate);
6331 PVE::QemuConfig->write_config($vmid, $conf);
6332 } else {
6333 mon_cmd($vmid, "stop");
6334 }
6335 });
6336
6337 if ($includestate) {
6338 # save vm state
6339 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
6340
6341 eval {
6342 set_migration_caps($vmid, 1);
6343 mon_cmd($vmid, "savevm-start", statefile => $path);
6344 for(;;) {
6345 my $state = mon_cmd($vmid, "query-savevm");
6346 if (!$state->{status}) {
6347 die "savevm not active\n";
6348 } elsif ($state->{status} eq 'active') {
6349 sleep(1);
6350 next;
6351 } elsif ($state->{status} eq 'completed') {
6352 print "State saved, quitting\n";
6353 last;
6354 } elsif ($state->{status} eq 'failed' && $state->{error}) {
6355 die "query-savevm failed with error '$state->{error}'\n"
6356 } else {
6357 die "query-savevm returned status '$state->{status}'\n";
6358 }
6359 }
6360 };
6361 my $err = $@;
6362
6363 PVE::QemuConfig->lock_config($vmid, sub {
6364 $conf = PVE::QemuConfig->load_config($vmid);
6365 if ($err) {
6366 # cleanup, but leave suspending lock, to indicate something went wrong
6367 eval {
6368 mon_cmd($vmid, "savevm-end");
6369 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6370 PVE::Storage::vdisk_free($storecfg, $vmstate);
6371 delete $conf->@{qw(vmstate runningmachine runningcpu)};
6372 PVE::QemuConfig->write_config($vmid, $conf);
6373 };
6374 warn $@ if $@;
6375 die $err;
6376 }
6377
6378 die "lock changed unexpectedly\n"
6379 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6380
6381 mon_cmd($vmid, "quit");
6382 $conf->{lock} = 'suspended';
6383 PVE::QemuConfig->write_config($vmid, $conf);
6384 });
6385 }
6386 }
6387
6388 # $nocheck is set when called as part of a migration - in this context the
6389 # location of the config file (source or target node) is not deterministic,
6390 # since migration cannot wait for pmxcfs to process the rename
6391 sub vm_resume {
6392 my ($vmid, $skiplock, $nocheck) = @_;
6393
6394 PVE::QemuConfig->lock_config($vmid, sub {
6395 my $res = mon_cmd($vmid, 'query-status');
6396 my $resume_cmd = 'cont';
6397 my $reset = 0;
6398 my $conf;
6399 if ($nocheck) {
6400 $conf = eval { PVE::QemuConfig->load_config($vmid) }; # try on target node
6401 if ($@) {
6402 my $vmlist = PVE::Cluster::get_vmlist();
6403 if (exists($vmlist->{ids}->{$vmid})) {
6404 my $node = $vmlist->{ids}->{$vmid}->{node};
6405 $conf = eval { PVE::QemuConfig->load_config($vmid, $node) }; # try on source node
6406 }
6407 if (!$conf) {
6408 PVE::Cluster::cfs_update(); # vmlist was wrong, invalidate cache
6409 $conf = PVE::QemuConfig->load_config($vmid); # last try on target node again
6410 }
6411 }
6412 } else {
6413 $conf = PVE::QemuConfig->load_config($vmid);
6414 }
6415
6416 if ($res->{status}) {
6417 return if $res->{status} eq 'running'; # job done, go home
6418 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6419 $reset = 1 if $res->{status} eq 'shutdown';
6420 }
6421
6422 if (!$nocheck) {
6423 PVE::QemuConfig->check_lock($conf)
6424 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
6425 }
6426
6427 if ($reset) {
6428 # required if a VM shuts down during a backup and we get a resume
6429 # request before the backup finishes for example
6430 mon_cmd($vmid, "system_reset");
6431 }
6432
6433 add_nets_bridge_fdb($conf, $vmid) if $resume_cmd eq 'cont';
6434
6435 mon_cmd($vmid, $resume_cmd);
6436 });
6437 }
6438
6439 sub vm_sendkey {
6440 my ($vmid, $skiplock, $key) = @_;
6441
6442 PVE::QemuConfig->lock_config($vmid, sub {
6443
6444 my $conf = PVE::QemuConfig->load_config($vmid);
6445
6446 # there is no qmp command, so we use the human monitor command
6447 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
6448 die $res if $res ne '';
6449 });
6450 }
6451
6452 # vzdump restore implementaion
6453
6454 sub tar_archive_read_firstfile {
6455 my $archive = shift;
6456
6457 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6458
6459 # try to detect archive type first
6460 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
6461 die "unable to open file '$archive'\n";
6462 my $firstfile = <$fh>;
6463 kill 15, $pid;
6464 close $fh;
6465
6466 die "ERROR: archive contaions no data\n" if !$firstfile;
6467 chomp $firstfile;
6468
6469 return $firstfile;
6470 }
6471
6472 sub tar_restore_cleanup {
6473 my ($storecfg, $statfile) = @_;
6474
6475 print STDERR "starting cleanup\n";
6476
6477 if (my $fd = IO::File->new($statfile, "r")) {
6478 while (defined(my $line = <$fd>)) {
6479 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6480 my $volid = $2;
6481 eval {
6482 if ($volid =~ m|^/|) {
6483 unlink $volid || die 'unlink failed\n';
6484 } else {
6485 PVE::Storage::vdisk_free($storecfg, $volid);
6486 }
6487 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6488 };
6489 print STDERR "unable to cleanup '$volid' - $@" if $@;
6490 } else {
6491 print STDERR "unable to parse line in statfile - $line";
6492 }
6493 }
6494 $fd->close();
6495 }
6496 }
6497
6498 sub restore_file_archive {
6499 my ($archive, $vmid, $user, $opts) = @_;
6500
6501 return restore_vma_archive($archive, $vmid, $user, $opts)
6502 if $archive eq '-';
6503
6504 my $info = PVE::Storage::archive_info($archive);
6505 my $format = $opts->{format} // $info->{format};
6506 my $comp = $info->{compression};
6507
6508 # try to detect archive format
6509 if ($format eq 'tar') {
6510 return restore_tar_archive($archive, $vmid, $user, $opts);
6511 } else {
6512 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6513 }
6514 }
6515
6516 # hepler to remove disks that will not be used after restore
6517 my $restore_cleanup_oldconf = sub {
6518 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6519
6520 my $kept_disks = {};
6521
6522 PVE::QemuConfig->foreach_volume($oldconf, sub {
6523 my ($ds, $drive) = @_;
6524
6525 return if drive_is_cdrom($drive, 1);
6526
6527 my $volid = $drive->{file};
6528 return if !$volid || $volid =~ m|^/|;
6529
6530 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6531 return if !$path || !$owner || ($owner != $vmid);
6532
6533 # Note: only delete disk we want to restore
6534 # other volumes will become unused
6535 if ($virtdev_hash->{$ds}) {
6536 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6537 if (my $err = $@) {
6538 warn $err;
6539 }
6540 } else {
6541 $kept_disks->{$volid} = 1;
6542 }
6543 });
6544
6545 # after the restore we have no snapshots anymore
6546 for my $snapname (keys $oldconf->{snapshots}->%*) {
6547 my $snap = $oldconf->{snapshots}->{$snapname};
6548 if ($snap->{vmstate}) {
6549 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6550 if (my $err = $@) {
6551 warn $err;
6552 }
6553 }
6554
6555 for my $volid (keys $kept_disks->%*) {
6556 eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname); };
6557 warn $@ if $@;
6558 }
6559 }
6560 };
6561
6562 # Helper to parse vzdump backup device hints
6563 #
6564 # $rpcenv: Environment, used to ckeck storage permissions
6565 # $user: User ID, to check storage permissions
6566 # $storecfg: Storage configuration
6567 # $fh: the file handle for reading the configuration
6568 # $devinfo: should contain device sizes for all backu-up'ed devices
6569 # $options: backup options (pool, default storage)
6570 #
6571 # Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6572 my $parse_backup_hints = sub {
6573 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6574
6575 my $check_storage = sub { # assert if an image can be allocate
6576 my ($storeid, $scfg) = @_;
6577 die "Content type 'images' is not available on storage '$storeid'\n"
6578 if !$scfg->{content}->{images};
6579 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace'])
6580 if $user ne 'root@pam';
6581 };
6582
6583 my $virtdev_hash = {};
6584 while (defined(my $line = <$fh>)) {
6585 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6586 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6587 die "archive does not contain data for drive '$virtdev'\n"
6588 if !$devinfo->{$devname};
6589
6590 if (defined($options->{storage})) {
6591 $storeid = $options->{storage} || 'local';
6592 } elsif (!$storeid) {
6593 $storeid = 'local';
6594 }
6595 $format = 'raw' if !$format;
6596 $devinfo->{$devname}->{devname} = $devname;
6597 $devinfo->{$devname}->{virtdev} = $virtdev;
6598 $devinfo->{$devname}->{format} = $format;
6599 $devinfo->{$devname}->{storeid} = $storeid;
6600
6601 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6602 $check_storage->($storeid, $scfg); # permission and content type check
6603
6604 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6605 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6606 my $virtdev = $1;
6607 my $drive = parse_drive($virtdev, $2);
6608
6609 if (drive_is_cloudinit($drive)) {
6610 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6611 $storeid = $options->{storage} if defined ($options->{storage});
6612 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6613 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6614
6615 $check_storage->($storeid, $scfg); # permission and content type check
6616
6617 $virtdev_hash->{$virtdev} = {
6618 format => $format,
6619 storeid => $storeid,
6620 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6621 is_cloudinit => 1,
6622 };
6623 }
6624 }
6625 }
6626
6627 return $virtdev_hash;
6628 };
6629
6630 # Helper to allocate and activate all volumes required for a restore
6631 #
6632 # $storecfg: Storage configuration
6633 # $virtdev_hash: as returned by parse_backup_hints()
6634 #
6635 # Returns: { $virtdev => $volid }
6636 my $restore_allocate_devices = sub {
6637 my ($storecfg, $virtdev_hash, $vmid) = @_;
6638
6639 my $map = {};
6640 foreach my $virtdev (sort keys %$virtdev_hash) {
6641 my $d = $virtdev_hash->{$virtdev};
6642 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6643 my $storeid = $d->{storeid};
6644 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6645
6646 # test if requested format is supported
6647 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6648 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6649 $d->{format} = $defFormat if !$supported;
6650
6651 my $name;
6652 if ($d->{is_cloudinit}) {
6653 $name = "vm-$vmid-cloudinit";
6654 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6655 if ($scfg->{path}) {
6656 $name .= ".$d->{format}";
6657 }
6658 }
6659
6660 my $volid = PVE::Storage::vdisk_alloc(
6661 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6662
6663 print STDERR "new volume ID is '$volid'\n";
6664 $d->{volid} = $volid;
6665
6666 PVE::Storage::activate_volumes($storecfg, [$volid]);
6667
6668 $map->{$virtdev} = $volid;
6669 }
6670
6671 return $map;
6672 };
6673
6674 sub restore_update_config_line {
6675 my ($cookie, $map, $line, $unique) = @_;
6676
6677 return '' if $line =~ m/^\#qmdump\#/;
6678 return '' if $line =~ m/^\#vzdump\#/;
6679 return '' if $line =~ m/^lock:/;
6680 return '' if $line =~ m/^unused\d+:/;
6681 return '' if $line =~ m/^parent:/;
6682
6683 my $res = '';
6684
6685 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6686 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6687 # try to convert old 1.X settings
6688 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6689 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6690 my ($model, $macaddr) = split(/\=/, $devconfig);
6691 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6692 my $net = {
6693 model => $model,
6694 bridge => "vmbr$ind",
6695 macaddr => $macaddr,
6696 };
6697 my $netstr = print_net($net);
6698
6699 $res .= "net$cookie->{netcount}: $netstr\n";
6700 $cookie->{netcount}++;
6701 }
6702 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6703 my ($id, $netstr) = ($1, $2);
6704 my $net = parse_net($netstr);
6705 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6706 $netstr = print_net($net);
6707 $res .= "$id: $netstr\n";
6708 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6709 my $virtdev = $1;
6710 my $value = $3;
6711 my $di = parse_drive($virtdev, $value);
6712 if (defined($di->{backup}) && !$di->{backup}) {
6713 $res .= "#$line";
6714 } elsif ($map->{$virtdev}) {
6715 delete $di->{format}; # format can change on restore
6716 $di->{file} = $map->{$virtdev};
6717 $value = print_drive($di);
6718 $res .= "$virtdev: $value\n";
6719 } else {
6720 $res .= $line;
6721 }
6722 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6723 my $vmgenid = $1;
6724 if ($vmgenid ne '0') {
6725 # always generate a new vmgenid if there was a valid one setup
6726 $vmgenid = generate_uuid();
6727 }
6728 $res .= "vmgenid: $vmgenid\n";
6729 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6730 my ($uuid, $uuid_str);
6731 UUID::generate($uuid);
6732 UUID::unparse($uuid, $uuid_str);
6733 my $smbios1 = parse_smbios1($2);
6734 $smbios1->{uuid} = $uuid_str;
6735 $res .= $1.print_smbios1($smbios1)."\n";
6736 } else {
6737 $res .= $line;
6738 }
6739
6740 return $res;
6741 }
6742
6743 my $restore_deactivate_volumes = sub {
6744 my ($storecfg, $virtdev_hash) = @_;
6745
6746 my $vollist = [];
6747 for my $dev (values $virtdev_hash->%*) {
6748 push $vollist->@*, $dev->{volid} if $dev->{volid};
6749 }
6750
6751 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
6752 print STDERR $@ if $@;
6753 };
6754
6755 my $restore_destroy_volumes = sub {
6756 my ($storecfg, $virtdev_hash) = @_;
6757
6758 for my $dev (values $virtdev_hash->%*) {
6759 my $volid = $dev->{volid} or next;
6760 eval {
6761 PVE::Storage::vdisk_free($storecfg, $volid);
6762 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6763 };
6764 print STDERR "unable to cleanup '$volid' - $@" if $@;
6765 }
6766 };
6767
6768 my $restore_merge_config = sub {
6769 my ($filename, $backup_conf_raw, $override_conf) = @_;
6770
6771 my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
6772 for my $key (keys $override_conf->%*) {
6773 $backup_conf->{$key} = $override_conf->{$key};
6774 }
6775
6776 return $backup_conf;
6777 };
6778
6779 sub scan_volids {
6780 my ($cfg, $vmid) = @_;
6781
6782 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6783
6784 my $volid_hash = {};
6785 foreach my $storeid (keys %$info) {
6786 foreach my $item (@{$info->{$storeid}}) {
6787 next if !($item->{volid} && $item->{size});
6788 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6789 $volid_hash->{$item->{volid}} = $item;
6790 }
6791 }
6792
6793 return $volid_hash;
6794 }
6795
6796 sub update_disk_config {
6797 my ($vmid, $conf, $volid_hash) = @_;
6798
6799 my $changes;
6800 my $prefix = "VM $vmid";
6801
6802 # used and unused disks
6803 my $referenced = {};
6804
6805 # Note: it is allowed to define multiple storages with same path (alias), so
6806 # we need to check both 'volid' and real 'path' (two different volid can point
6807 # to the same path).
6808
6809 my $referencedpath = {};
6810
6811 # update size info
6812 PVE::QemuConfig->foreach_volume($conf, sub {
6813 my ($opt, $drive) = @_;
6814
6815 my $volid = $drive->{file};
6816 return if !$volid;
6817 my $volume = $volid_hash->{$volid};
6818
6819 # mark volid as "in-use" for next step
6820 $referenced->{$volid} = 1;
6821 if ($volume && (my $path = $volume->{path})) {
6822 $referencedpath->{$path} = 1;
6823 }
6824
6825 return if drive_is_cdrom($drive);
6826 return if !$volume;
6827
6828 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6829 if (defined($updated)) {
6830 $changes = 1;
6831 $conf->{$opt} = print_drive($updated);
6832 print "$prefix ($opt): $msg\n";
6833 }
6834 });
6835
6836 # remove 'unusedX' entry if volume is used
6837 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6838 my ($opt, $drive) = @_;
6839
6840 my $volid = $drive->{file};
6841 return if !$volid;
6842
6843 my $path;
6844 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6845 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6846 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6847 $changes = 1;
6848 delete $conf->{$opt};
6849 }
6850
6851 $referenced->{$volid} = 1;
6852 $referencedpath->{$path} = 1 if $path;
6853 });
6854
6855 foreach my $volid (sort keys %$volid_hash) {
6856 next if $volid =~ m/vm-$vmid-state-/;
6857 next if $referenced->{$volid};
6858 my $path = $volid_hash->{$volid}->{path};
6859 next if !$path; # just to be sure
6860 next if $referencedpath->{$path};
6861 $changes = 1;
6862 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6863 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6864 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6865 }
6866
6867 return $changes;
6868 }
6869
6870 sub rescan {
6871 my ($vmid, $nolock, $dryrun) = @_;
6872
6873 my $cfg = PVE::Storage::config();
6874
6875 print "rescan volumes...\n";
6876 my $volid_hash = scan_volids($cfg, $vmid);
6877
6878 my $updatefn = sub {
6879 my ($vmid) = @_;
6880
6881 my $conf = PVE::QemuConfig->load_config($vmid);
6882
6883 PVE::QemuConfig->check_lock($conf);
6884
6885 my $vm_volids = {};
6886 foreach my $volid (keys %$volid_hash) {
6887 my $info = $volid_hash->{$volid};
6888 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6889 }
6890
6891 my $changes = update_disk_config($vmid, $conf, $vm_volids);
6892
6893 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
6894 };
6895
6896 if (defined($vmid)) {
6897 if ($nolock) {
6898 &$updatefn($vmid);
6899 } else {
6900 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6901 }
6902 } else {
6903 my $vmlist = config_list();
6904 foreach my $vmid (keys %$vmlist) {
6905 if ($nolock) {
6906 &$updatefn($vmid);
6907 } else {
6908 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6909 }
6910 }
6911 }
6912 }
6913
6914 sub restore_proxmox_backup_archive {
6915 my ($archive, $vmid, $user, $options) = @_;
6916
6917 my $storecfg = PVE::Storage::config();
6918
6919 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
6920 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6921
6922 my $fingerprint = $scfg->{fingerprint};
6923 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
6924
6925 my $repo = PVE::PBSClient::get_repository($scfg);
6926 my $namespace = $scfg->{namespace};
6927
6928 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
6929 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
6930 local $ENV{PBS_PASSWORD} = $password;
6931 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
6932
6933 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
6934 PVE::Storage::parse_volname($storecfg, $archive);
6935
6936 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
6937
6938 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
6939
6940 my $tmpdir = "/var/tmp/vzdumptmp$$";
6941 rmtree $tmpdir;
6942 mkpath $tmpdir;
6943
6944 my $conffile = PVE::QemuConfig->config_file($vmid);
6945 # disable interrupts (always do cleanups)
6946 local $SIG{INT} =
6947 local $SIG{TERM} =
6948 local $SIG{QUIT} =
6949 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
6950
6951 # Note: $oldconf is undef if VM does not exists
6952 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6953 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6954 my $new_conf_raw = '';
6955
6956 my $rpcenv = PVE::RPCEnvironment::get();
6957 my $devinfo = {}; # info about drives included in backup
6958 my $virtdev_hash = {}; # info about allocated drives
6959
6960 eval {
6961 # enable interrupts
6962 local $SIG{INT} =
6963 local $SIG{TERM} =
6964 local $SIG{QUIT} =
6965 local $SIG{HUP} =
6966 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6967
6968 my $cfgfn = "$tmpdir/qemu-server.conf";
6969 my $firewall_config_fn = "$tmpdir/fw.conf";
6970 my $index_fn = "$tmpdir/index.json";
6971
6972 my $cmd = "restore";
6973
6974 my $param = [$pbs_backup_name, "index.json", $index_fn];
6975 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6976 my $index = PVE::Tools::file_get_contents($index_fn);
6977 $index = decode_json($index);
6978
6979 foreach my $info (@{$index->{files}}) {
6980 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
6981 my $devname = $1;
6982 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
6983 $devinfo->{$devname}->{size} = $1;
6984 } else {
6985 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
6986 }
6987 }
6988 }
6989
6990 my $is_qemu_server_backup = scalar(
6991 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
6992 );
6993 if (!$is_qemu_server_backup) {
6994 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
6995 }
6996 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
6997
6998 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
6999 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7000
7001 if ($has_firewall_config) {
7002 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
7003 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7004
7005 my $pve_firewall_dir = '/etc/pve/firewall';
7006 mkdir $pve_firewall_dir; # make sure the dir exists
7007 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
7008 }
7009
7010 my $fh = IO::File->new($cfgfn, "r") ||
7011 die "unable to read qemu-server.conf - $!\n";
7012
7013 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
7014
7015 # fixme: rate limit?
7016
7017 # create empty/temp config
7018 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
7019
7020 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
7021
7022 # allocate volumes
7023 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
7024
7025 foreach my $virtdev (sort keys %$virtdev_hash) {
7026 my $d = $virtdev_hash->{$virtdev};
7027 next if $d->{is_cloudinit}; # no need to restore cloudinit
7028
7029 # this fails if storage is unavailable
7030 my $volid = $d->{volid};
7031 my $path = PVE::Storage::path($storecfg, $volid);
7032
7033 # for live-restore we only want to preload the efidisk and TPM state
7034 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
7035
7036 my @ns_arg;
7037 if (defined(my $ns = $scfg->{namespace})) {
7038 @ns_arg = ('--ns', $ns);
7039 }
7040
7041 my $pbs_restore_cmd = [
7042 '/usr/bin/pbs-restore',
7043 '--repository', $repo,
7044 @ns_arg,
7045 $pbs_backup_name,
7046 "$d->{devname}.img.fidx",
7047 $path,
7048 '--verbose',
7049 ];
7050
7051 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
7052 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
7053
7054 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
7055 push @$pbs_restore_cmd, '--skip-zero';
7056 }
7057
7058 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
7059 print "restore proxmox backup image: $dbg_cmdstring\n";
7060 run_command($pbs_restore_cmd);
7061 }
7062
7063 $fh->seek(0, 0) || die "seek failed - $!\n";
7064
7065 my $cookie = { netcount => 0 };
7066 while (defined(my $line = <$fh>)) {
7067 $new_conf_raw .= restore_update_config_line(
7068 $cookie,
7069 $map,
7070 $line,
7071 $options->{unique},
7072 );
7073 }
7074
7075 $fh->close();
7076 };
7077 my $err = $@;
7078
7079 if ($err || !$options->{live}) {
7080 $restore_deactivate_volumes->($storecfg, $virtdev_hash);
7081 }
7082
7083 rmtree $tmpdir;
7084
7085 if ($err) {
7086 $restore_destroy_volumes->($storecfg, $virtdev_hash);
7087 die $err;
7088 }
7089
7090 if ($options->{live}) {
7091 # keep lock during live-restore
7092 $new_conf_raw .= "\nlock: create";
7093 }
7094
7095 my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $options->{override_conf});
7096 PVE::QemuConfig->write_config($vmid, $new_conf);
7097
7098 eval { rescan($vmid, 1); };
7099 warn $@ if $@;
7100
7101 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
7102
7103 if ($options->{live}) {
7104 # enable interrupts
7105 local $SIG{INT} =
7106 local $SIG{TERM} =
7107 local $SIG{QUIT} =
7108 local $SIG{HUP} =
7109 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
7110
7111 my $conf = PVE::QemuConfig->load_config($vmid);
7112 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
7113
7114 # these special drives are already restored before start
7115 delete $devinfo->{'drive-efidisk0'};
7116 delete $devinfo->{'drive-tpmstate0-backup'};
7117
7118 my $pbs_opts = {
7119 repo => $repo,
7120 keyfile => $keyfile,
7121 snapshot => $pbs_backup_name,
7122 namespace => $namespace,
7123 };
7124 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $pbs_opts);
7125
7126 PVE::QemuConfig->remove_lock($vmid, "create");
7127 }
7128 }
7129
7130 sub pbs_live_restore {
7131 my ($vmid, $conf, $storecfg, $restored_disks, $opts) = @_;
7132
7133 print "starting VM for live-restore\n";
7134 print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n";
7135
7136 my $pbs_backing = {};
7137 for my $ds (keys %$restored_disks) {
7138 $ds =~ m/^drive-(.*)$/;
7139 my $confname = $1;
7140 $pbs_backing->{$confname} = {
7141 repository => $opts->{repo},
7142 snapshot => $opts->{snapshot},
7143 archive => "$ds.img.fidx",
7144 };
7145 $pbs_backing->{$confname}->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile};
7146 $pbs_backing->{$confname}->{namespace} = $opts->{namespace} if defined($opts->{namespace});
7147
7148 my $drive = parse_drive($confname, $conf->{$confname});
7149 print "restoring '$ds' to '$drive->{file}'\n";
7150 }
7151
7152 my $drives_streamed = 0;
7153 eval {
7154 # make sure HA doesn't interrupt our restore by stopping the VM
7155 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
7156 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
7157 }
7158
7159 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
7160 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
7161 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
7162
7163 my $qmeventd_fd = register_qmeventd_handle($vmid);
7164
7165 # begin streaming, i.e. data copy from PBS to target disk for every vol,
7166 # this will effectively collapse the backing image chain consisting of
7167 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
7168 # removes itself once all backing images vanish with 'auto-remove=on')
7169 my $jobs = {};
7170 for my $ds (sort keys %$restored_disks) {
7171 my $job_id = "restore-$ds";
7172 mon_cmd($vmid, 'block-stream',
7173 'job-id' => $job_id,
7174 device => "$ds",
7175 );
7176 $jobs->{$job_id} = {};
7177 }
7178
7179 mon_cmd($vmid, 'cont');
7180 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
7181
7182 print "restore-drive jobs finished successfully, removing all tracking block devices"
7183 ." to disconnect from Proxmox Backup Server\n";
7184
7185 for my $ds (sort keys %$restored_disks) {
7186 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
7187 }
7188
7189 close($qmeventd_fd);
7190 };
7191
7192 my $err = $@;
7193
7194 if ($err) {
7195 warn "An error occurred during live-restore: $err\n";
7196 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
7197 die "live-restore failed\n";
7198 }
7199 }
7200
7201 sub restore_vma_archive {
7202 my ($archive, $vmid, $user, $opts, $comp) = @_;
7203
7204 my $readfrom = $archive;
7205
7206 my $cfg = PVE::Storage::config();
7207 my $commands = [];
7208 my $bwlimit = $opts->{bwlimit};
7209
7210 my $dbg_cmdstring = '';
7211 my $add_pipe = sub {
7212 my ($cmd) = @_;
7213 push @$commands, $cmd;
7214 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
7215 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
7216 $readfrom = '-';
7217 };
7218
7219 my $input = undef;
7220 if ($archive eq '-') {
7221 $input = '<&STDIN';
7222 } else {
7223 # If we use a backup from a PVE defined storage we also consider that
7224 # storage's rate limit:
7225 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
7226 if (defined($volid)) {
7227 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
7228 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
7229 if ($readlimit) {
7230 print STDERR "applying read rate limit: $readlimit\n";
7231 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
7232 $add_pipe->($cstream);
7233 }
7234 }
7235 }
7236
7237 if ($comp) {
7238 my $info = PVE::Storage::decompressor_info('vma', $comp);
7239 my $cmd = $info->{decompressor};
7240 push @$cmd, $readfrom;
7241 $add_pipe->($cmd);
7242 }
7243
7244 my $tmpdir = "/var/tmp/vzdumptmp$$";
7245 rmtree $tmpdir;
7246
7247 # disable interrupts (always do cleanups)
7248 local $SIG{INT} =
7249 local $SIG{TERM} =
7250 local $SIG{QUIT} =
7251 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
7252
7253 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
7254 POSIX::mkfifo($mapfifo, 0600);
7255 my $fifofh;
7256 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
7257
7258 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
7259
7260 my $oldtimeout;
7261 my $timeout = 5;
7262
7263 my $devinfo = {}; # info about drives included in backup
7264 my $virtdev_hash = {}; # info about allocated drives
7265
7266 my $rpcenv = PVE::RPCEnvironment::get();
7267
7268 my $conffile = PVE::QemuConfig->config_file($vmid);
7269
7270 # Note: $oldconf is undef if VM does not exist
7271 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7272 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
7273 my $new_conf_raw = '';
7274
7275 my %storage_limits;
7276
7277 my $print_devmap = sub {
7278 my $cfgfn = "$tmpdir/qemu-server.conf";
7279
7280 # we can read the config - that is already extracted
7281 my $fh = IO::File->new($cfgfn, "r") ||
7282 die "unable to read qemu-server.conf - $!\n";
7283
7284 my $fwcfgfn = "$tmpdir/qemu-server.fw";
7285 if (-f $fwcfgfn) {
7286 my $pve_firewall_dir = '/etc/pve/firewall';
7287 mkdir $pve_firewall_dir; # make sure the dir exists
7288 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
7289 }
7290
7291 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
7292
7293 foreach my $info (values %{$virtdev_hash}) {
7294 my $storeid = $info->{storeid};
7295 next if defined($storage_limits{$storeid});
7296
7297 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
7298 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
7299 $storage_limits{$storeid} = $limit * 1024;
7300 }
7301
7302 foreach my $devname (keys %$devinfo) {
7303 die "found no device mapping information for device '$devname'\n"
7304 if !$devinfo->{$devname}->{virtdev};
7305 }
7306
7307 # create empty/temp config
7308 if ($oldconf) {
7309 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
7310 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
7311 }
7312
7313 # allocate volumes
7314 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
7315
7316 # print restore information to $fifofh
7317 foreach my $virtdev (sort keys %$virtdev_hash) {
7318 my $d = $virtdev_hash->{$virtdev};
7319 next if $d->{is_cloudinit}; # no need to restore cloudinit
7320
7321 my $storeid = $d->{storeid};
7322 my $volid = $d->{volid};
7323
7324 my $map_opts = '';
7325 if (my $limit = $storage_limits{$storeid}) {
7326 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
7327 }
7328
7329 my $write_zeros = 1;
7330 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
7331 $write_zeros = 0;
7332 }
7333
7334 my $path = PVE::Storage::path($cfg, $volid);
7335
7336 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
7337
7338 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
7339 }
7340
7341 $fh->seek(0, 0) || die "seek failed - $!\n";
7342
7343 my $cookie = { netcount => 0 };
7344 while (defined(my $line = <$fh>)) {
7345 $new_conf_raw .= restore_update_config_line(
7346 $cookie,
7347 $map,
7348 $line,
7349 $opts->{unique},
7350 );
7351 }
7352
7353 $fh->close();
7354 };
7355
7356 eval {
7357 # enable interrupts
7358 local $SIG{INT} =
7359 local $SIG{TERM} =
7360 local $SIG{QUIT} =
7361 local $SIG{HUP} =
7362 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7363 local $SIG{ALRM} = sub { die "got timeout\n"; };
7364
7365 $oldtimeout = alarm($timeout);
7366
7367 my $parser = sub {
7368 my $line = shift;
7369
7370 print "$line\n";
7371
7372 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
7373 my ($dev_id, $size, $devname) = ($1, $2, $3);
7374 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
7375 } elsif ($line =~ m/^CTIME: /) {
7376 # we correctly received the vma config, so we can disable
7377 # the timeout now for disk allocation (set to 10 minutes, so
7378 # that we always timeout if something goes wrong)
7379 alarm(600);
7380 &$print_devmap();
7381 print $fifofh "done\n";
7382 my $tmp = $oldtimeout || 0;
7383 $oldtimeout = undef;
7384 alarm($tmp);
7385 close($fifofh);
7386 $fifofh = undef;
7387 }
7388 };
7389
7390 print "restore vma archive: $dbg_cmdstring\n";
7391 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
7392 };
7393 my $err = $@;
7394
7395 alarm($oldtimeout) if $oldtimeout;
7396
7397 $restore_deactivate_volumes->($cfg, $virtdev_hash);
7398
7399 close($fifofh) if $fifofh;
7400 unlink $mapfifo;
7401 rmtree $tmpdir;
7402
7403 if ($err) {
7404 $restore_destroy_volumes->($cfg, $virtdev_hash);
7405 die $err;
7406 }
7407
7408 my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $opts->{override_conf});
7409 PVE::QemuConfig->write_config($vmid, $new_conf);
7410
7411 eval { rescan($vmid, 1); };
7412 warn $@ if $@;
7413
7414 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
7415 }
7416
7417 sub restore_tar_archive {
7418 my ($archive, $vmid, $user, $opts) = @_;
7419
7420 if (scalar(keys $opts->{override_conf}->%*) > 0) {
7421 my $keystring = join(' ', keys $opts->{override_conf}->%*);
7422 die "cannot pass along options ($keystring) when restoring from tar archive\n";
7423 }
7424
7425 if ($archive ne '-') {
7426 my $firstfile = tar_archive_read_firstfile($archive);
7427 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
7428 if $firstfile ne 'qemu-server.conf';
7429 }
7430
7431 my $storecfg = PVE::Storage::config();
7432
7433 # avoid zombie disks when restoring over an existing VM -> cleanup first
7434 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
7435 # skiplock=1 because qmrestore has set the 'create' lock itself already
7436 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
7437 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
7438
7439 my $tocmd = "/usr/lib/qemu-server/qmextract";
7440
7441 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
7442 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
7443 $tocmd .= ' --prealloc' if $opts->{prealloc};
7444 $tocmd .= ' --info' if $opts->{info};
7445
7446 # tar option "xf" does not autodetect compression when read from STDIN,
7447 # so we pipe to zcat
7448 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7449 PVE::Tools::shellquote("--to-command=$tocmd");
7450
7451 my $tmpdir = "/var/tmp/vzdumptmp$$";
7452 mkpath $tmpdir;
7453
7454 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7455 local $ENV{VZDUMP_VMID} = $vmid;
7456 local $ENV{VZDUMP_USER} = $user;
7457
7458 my $conffile = PVE::QemuConfig->config_file($vmid);
7459 my $new_conf_raw = '';
7460
7461 # disable interrupts (always do cleanups)
7462 local $SIG{INT} =
7463 local $SIG{TERM} =
7464 local $SIG{QUIT} =
7465 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7466
7467 eval {
7468 # enable interrupts
7469 local $SIG{INT} =
7470 local $SIG{TERM} =
7471 local $SIG{QUIT} =
7472 local $SIG{HUP} =
7473 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7474
7475 if ($archive eq '-') {
7476 print "extracting archive from STDIN\n";
7477 run_command($cmd, input => "<&STDIN");
7478 } else {
7479 print "extracting archive '$archive'\n";
7480 run_command($cmd);
7481 }
7482
7483 return if $opts->{info};
7484
7485 # read new mapping
7486 my $map = {};
7487 my $statfile = "$tmpdir/qmrestore.stat";
7488 if (my $fd = IO::File->new($statfile, "r")) {
7489 while (defined (my $line = <$fd>)) {
7490 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7491 $map->{$1} = $2 if $1;
7492 } else {
7493 print STDERR "unable to parse line in statfile - $line\n";
7494 }
7495 }
7496 $fd->close();
7497 }
7498
7499 my $confsrc = "$tmpdir/qemu-server.conf";
7500
7501 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
7502
7503 my $cookie = { netcount => 0 };
7504 while (defined (my $line = <$srcfd>)) {
7505 $new_conf_raw .= restore_update_config_line(
7506 $cookie,
7507 $map,
7508 $line,
7509 $opts->{unique},
7510 );
7511 }
7512
7513 $srcfd->close();
7514 };
7515 if (my $err = $@) {
7516 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7517 die $err;
7518 }
7519
7520 rmtree $tmpdir;
7521
7522 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7523
7524 PVE::Cluster::cfs_update(); # make sure we read new file
7525
7526 eval { rescan($vmid, 1); };
7527 warn $@ if $@;
7528 };
7529
7530 sub foreach_storage_used_by_vm {
7531 my ($conf, $func) = @_;
7532
7533 my $sidhash = {};
7534
7535 PVE::QemuConfig->foreach_volume($conf, sub {
7536 my ($ds, $drive) = @_;
7537 return if drive_is_cdrom($drive);
7538
7539 my $volid = $drive->{file};
7540
7541 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7542 $sidhash->{$sid} = $sid if $sid;
7543 });
7544
7545 foreach my $sid (sort keys %$sidhash) {
7546 &$func($sid);
7547 }
7548 }
7549
7550 my $qemu_snap_storage = {
7551 rbd => 1,
7552 };
7553 sub do_snapshots_with_qemu {
7554 my ($storecfg, $volid, $deviceid) = @_;
7555
7556 return if $deviceid =~ m/tpmstate0/;
7557
7558 my $storage_name = PVE::Storage::parse_volume_id($volid);
7559 my $scfg = $storecfg->{ids}->{$storage_name};
7560 die "could not find storage '$storage_name'\n" if !defined($scfg);
7561
7562 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7563 return 1;
7564 }
7565
7566 if ($volid =~ m/\.(qcow2|qed)$/){
7567 return 1;
7568 }
7569
7570 return;
7571 }
7572
7573 sub qga_check_running {
7574 my ($vmid, $nowarn) = @_;
7575
7576 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7577 if ($@) {
7578 warn "QEMU Guest Agent is not running - $@" if !$nowarn;
7579 return 0;
7580 }
7581 return 1;
7582 }
7583
7584 sub template_create {
7585 my ($vmid, $conf, $disk) = @_;
7586
7587 my $storecfg = PVE::Storage::config();
7588
7589 PVE::QemuConfig->foreach_volume($conf, sub {
7590 my ($ds, $drive) = @_;
7591
7592 return if drive_is_cdrom($drive);
7593 return if $disk && $ds ne $disk;
7594
7595 my $volid = $drive->{file};
7596 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7597
7598 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7599 $drive->{file} = $voliddst;
7600 $conf->{$ds} = print_drive($drive);
7601 PVE::QemuConfig->write_config($vmid, $conf);
7602 });
7603 }
7604
7605 sub convert_iscsi_path {
7606 my ($path) = @_;
7607
7608 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7609 my $portal = $1;
7610 my $target = $2;
7611 my $lun = $3;
7612
7613 my $initiator_name = get_initiator_name();
7614
7615 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7616 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7617 }
7618
7619 die "cannot convert iscsi path '$path', unkown format\n";
7620 }
7621
7622 sub qemu_img_convert {
7623 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized) = @_;
7624
7625 my $storecfg = PVE::Storage::config();
7626 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7627 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7628
7629 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7630
7631 my $cachemode;
7632 my $src_path;
7633 my $src_is_iscsi = 0;
7634 my $src_format;
7635
7636 if ($src_storeid) {
7637 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7638 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7639 $src_format = qemu_img_format($src_scfg, $src_volname);
7640 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7641 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7642 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7643 } elsif (-f $src_volid || -b $src_volid) {
7644 $src_path = $src_volid;
7645 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7646 $src_format = $1;
7647 }
7648 }
7649
7650 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7651
7652 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7653 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7654 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7655 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7656
7657 my $cmd = [];
7658 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7659 push @$cmd, '-l', "snapshot.name=$snapname"
7660 if $snapname && $src_format && $src_format eq "qcow2";
7661 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7662 push @$cmd, '-T', $cachemode if defined($cachemode);
7663
7664 if ($src_is_iscsi) {
7665 push @$cmd, '--image-opts';
7666 $src_path = convert_iscsi_path($src_path);
7667 } elsif ($src_format) {
7668 push @$cmd, '-f', $src_format;
7669 }
7670
7671 if ($dst_is_iscsi) {
7672 push @$cmd, '--target-image-opts';
7673 $dst_path = convert_iscsi_path($dst_path);
7674 } else {
7675 push @$cmd, '-O', $dst_format;
7676 }
7677
7678 push @$cmd, $src_path;
7679
7680 if (!$dst_is_iscsi && $is_zero_initialized) {
7681 push @$cmd, "zeroinit:$dst_path";
7682 } else {
7683 push @$cmd, $dst_path;
7684 }
7685
7686 my $parser = sub {
7687 my $line = shift;
7688 if($line =~ m/\((\S+)\/100\%\)/){
7689 my $percent = $1;
7690 my $transferred = int($size * $percent / 100);
7691 my $total_h = render_bytes($size, 1);
7692 my $transferred_h = render_bytes($transferred, 1);
7693
7694 print "transferred $transferred_h of $total_h ($percent%)\n";
7695 }
7696
7697 };
7698
7699 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7700 my $err = $@;
7701 die "copy failed: $err" if $err;
7702 }
7703
7704 sub qemu_img_format {
7705 my ($scfg, $volname) = @_;
7706
7707 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7708 return $1;
7709 } else {
7710 return "raw";
7711 }
7712 }
7713
7714 sub qemu_drive_mirror {
7715 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7716
7717 $jobs = {} if !$jobs;
7718
7719 my $qemu_target;
7720 my $format;
7721 $jobs->{"drive-$drive"} = {};
7722
7723 if ($dst_volid =~ /^nbd:/) {
7724 $qemu_target = $dst_volid;
7725 $format = "nbd";
7726 } else {
7727 my $storecfg = PVE::Storage::config();
7728 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7729
7730 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7731
7732 $format = qemu_img_format($dst_scfg, $dst_volname);
7733
7734 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7735
7736 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7737 }
7738
7739 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7740 $opts->{format} = $format if $format;
7741
7742 if (defined($src_bitmap)) {
7743 $opts->{sync} = 'incremental';
7744 $opts->{bitmap} = $src_bitmap;
7745 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7746 }
7747
7748 if (defined($bwlimit)) {
7749 $opts->{speed} = $bwlimit * 1024;
7750 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7751 } else {
7752 print "drive mirror is starting for drive-$drive\n";
7753 }
7754
7755 # if a job already runs for this device we get an error, catch it for cleanup
7756 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7757 if (my $err = $@) {
7758 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7759 warn "$@\n" if $@;
7760 die "mirroring error: $err\n";
7761 }
7762
7763 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7764 }
7765
7766 # $completion can be either
7767 # 'complete': wait until all jobs are ready, block-job-complete them (default)
7768 # 'cancel': wait until all jobs are ready, block-job-cancel them
7769 # 'skip': wait until all jobs are ready, return with block jobs in ready state
7770 # 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7771 sub qemu_drive_mirror_monitor {
7772 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7773
7774 $completion //= 'complete';
7775 $op //= "mirror";
7776
7777 eval {
7778 my $err_complete = 0;
7779
7780 my $starttime = time ();
7781 while (1) {
7782 die "block job ('$op') timed out\n" if $err_complete > 300;
7783
7784 my $stats = mon_cmd($vmid, "query-block-jobs");
7785 my $ctime = time();
7786
7787 my $running_jobs = {};
7788 for my $stat (@$stats) {
7789 next if $stat->{type} ne $op;
7790 $running_jobs->{$stat->{device}} = $stat;
7791 }
7792
7793 my $readycounter = 0;
7794
7795 for my $job_id (sort keys %$jobs) {
7796 my $job = $running_jobs->{$job_id};
7797
7798 my $vanished = !defined($job);
7799 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7800 if($complete || ($vanished && $completion eq 'auto')) {
7801 print "$job_id: $op-job finished\n";
7802 delete $jobs->{$job_id};
7803 next;
7804 }
7805
7806 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7807
7808 my $busy = $job->{busy};
7809 my $ready = $job->{ready};
7810 if (my $total = $job->{len}) {
7811 my $transferred = $job->{offset} || 0;
7812 my $remaining = $total - $transferred;
7813 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7814
7815 my $duration = $ctime - $starttime;
7816 my $total_h = render_bytes($total, 1);
7817 my $transferred_h = render_bytes($transferred, 1);
7818
7819 my $status = sprintf(
7820 "transferred $transferred_h of $total_h ($percent%%) in %s",
7821 render_duration($duration),
7822 );
7823
7824 if ($ready) {
7825 if ($busy) {
7826 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7827 } else {
7828 $status .= ", ready";
7829 }
7830 }
7831 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7832 $jobs->{$job_id}->{ready} = $ready;
7833 }
7834
7835 $readycounter++ if $job->{ready};
7836 }
7837
7838 last if scalar(keys %$jobs) == 0;
7839
7840 if ($readycounter == scalar(keys %$jobs)) {
7841 print "all '$op' jobs are ready\n";
7842
7843 # do the complete later (or has already been done)
7844 last if $completion eq 'skip' || $completion eq 'auto';
7845
7846 if ($vmiddst && $vmiddst != $vmid) {
7847 my $agent_running = $qga && qga_check_running($vmid);
7848 if ($agent_running) {
7849 print "freeze filesystem\n";
7850 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
7851 warn $@ if $@;
7852 } else {
7853 print "suspend vm\n";
7854 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
7855 warn $@ if $@;
7856 }
7857
7858 # if we clone a disk for a new target vm, we don't switch the disk
7859 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
7860
7861 if ($agent_running) {
7862 print "unfreeze filesystem\n";
7863 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
7864 warn $@ if $@;
7865 } else {
7866 print "resume vm\n";
7867 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7868 warn $@ if $@;
7869 }
7870
7871 last;
7872 } else {
7873
7874 for my $job_id (sort keys %$jobs) {
7875 # try to switch the disk if source and destination are on the same guest
7876 print "$job_id: Completing block job_id...\n";
7877
7878 my $op;
7879 if ($completion eq 'complete') {
7880 $op = 'block-job-complete';
7881 } elsif ($completion eq 'cancel') {
7882 $op = 'block-job-cancel';
7883 } else {
7884 die "invalid completion value: $completion\n";
7885 }
7886 eval { mon_cmd($vmid, $op, device => $job_id) };
7887 if ($@ =~ m/cannot be completed/) {
7888 print "$job_id: block job cannot be completed, trying again.\n";
7889 $err_complete++;
7890 }else {
7891 print "$job_id: Completed successfully.\n";
7892 $jobs->{$job_id}->{complete} = 1;
7893 }
7894 }
7895 }
7896 }
7897 sleep 1;
7898 }
7899 };
7900 my $err = $@;
7901
7902 if ($err) {
7903 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7904 die "block job ($op) error: $err";
7905 }
7906 }
7907
7908 sub qemu_blockjobs_cancel {
7909 my ($vmid, $jobs) = @_;
7910
7911 foreach my $job (keys %$jobs) {
7912 print "$job: Cancelling block job\n";
7913 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
7914 $jobs->{$job}->{cancel} = 1;
7915 }
7916
7917 while (1) {
7918 my $stats = mon_cmd($vmid, "query-block-jobs");
7919
7920 my $running_jobs = {};
7921 foreach my $stat (@$stats) {
7922 $running_jobs->{$stat->{device}} = $stat;
7923 }
7924
7925 foreach my $job (keys %$jobs) {
7926
7927 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
7928 print "$job: Done.\n";
7929 delete $jobs->{$job};
7930 }
7931 }
7932
7933 last if scalar(keys %$jobs) == 0;
7934
7935 sleep 1;
7936 }
7937 }
7938
7939 sub clone_disk {
7940 my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
7941
7942 my ($vmid, $running) = $source->@{qw(vmid running)};
7943 my ($src_drivename, $drive, $snapname) = $source->@{qw(drivename drive snapname)};
7944
7945 my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)};
7946 my ($storage, $format) = $dest->@{qw(storage format)};
7947
7948 my $use_drive_mirror = $full && $running && $src_drivename && !$snapname;
7949
7950 if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) {
7951 die "cloning from/to EFI disk requires EFI disk\n"
7952 if $src_drivename eq 'efidisk0' || $dst_drivename eq 'efidisk0';
7953 die "cloning from/to TPM state requires TPM state\n"
7954 if $src_drivename eq 'tpmstate0' || $dst_drivename eq 'tpmstate0';
7955
7956 # This would lead to two device nodes in QEMU pointing to the same backing image!
7957 die "cannot change drive name when cloning disk from/to the same VM\n"
7958 if $use_drive_mirror && $vmid == $newvmid;
7959 }
7960
7961 die "cannot move TPM state while VM is running\n"
7962 if $use_drive_mirror && $src_drivename eq 'tpmstate0';
7963
7964 my $newvolid;
7965
7966 print "create " . ($full ? 'full' : 'linked') . " clone of drive ";
7967 print "$src_drivename " if $src_drivename;
7968 print "($drive->{file})\n";
7969
7970 if (!$full) {
7971 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
7972 push @$newvollist, $newvolid;
7973 } else {
7974
7975 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
7976 $storeid = $storage if $storage;
7977
7978 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
7979
7980 my $name = undef;
7981 my $size = undef;
7982 if (drive_is_cloudinit($drive)) {
7983 $name = "vm-$newvmid-cloudinit";
7984 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7985 if ($scfg->{path}) {
7986 $name .= ".$dst_format";
7987 }
7988 $snapname = undef;
7989 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
7990 } elsif ($dst_drivename eq 'efidisk0') {
7991 $size = $efisize or die "internal error - need to specify EFI disk size\n";
7992 } elsif ($dst_drivename eq 'tpmstate0') {
7993 $dst_format = 'raw';
7994 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7995 } else {
7996 ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
7997 }
7998 $newvolid = PVE::Storage::vdisk_alloc(
7999 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
8000 );
8001 push @$newvollist, $newvolid;
8002
8003 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
8004
8005 if (drive_is_cloudinit($drive)) {
8006 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
8007 # if this is the case, we have to complete any block-jobs still there from
8008 # previous drive-mirrors
8009 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
8010 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
8011 }
8012 goto no_data_clone;
8013 }
8014
8015 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
8016 if ($use_drive_mirror) {
8017 qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
8018 $completion, $qga, $bwlimit);
8019 } else {
8020 # TODO: handle bwlimits
8021 if ($dst_drivename eq 'efidisk0') {
8022 # the relevant data on the efidisk may be smaller than the source
8023 # e.g. on RBD/ZFS, so we use dd to copy only the amount
8024 # that is given by the OVMF_VARS.fd
8025 my $src_path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
8026 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
8027
8028 my $src_format = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[6];
8029
8030 # better for Ceph if block size is not too small, see bug #3324
8031 my $bs = 1024*1024;
8032
8033 my $cmd = ['qemu-img', 'dd', '-n', '-O', $dst_format];
8034
8035 if ($src_format eq 'qcow2' && $snapname) {
8036 die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
8037 if !min_version(kvm_user_version(), 6, 2);
8038 push $cmd->@*, '-l', $snapname;
8039 }
8040 push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
8041 run_command($cmd);
8042 } else {
8043 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit);
8044 }
8045 }
8046 }
8047
8048 no_data_clone:
8049 my ($size) = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
8050
8051 my $disk = dclone($drive);
8052 delete $disk->{format};
8053 $disk->{file} = $newvolid;
8054 $disk->{size} = $size if defined($size);
8055
8056 return $disk;
8057 }
8058
8059 sub get_running_qemu_version {
8060 my ($vmid) = @_;
8061 my $res = mon_cmd($vmid, "query-version");
8062 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
8063 }
8064
8065 sub qemu_use_old_bios_files {
8066 my ($machine_type) = @_;
8067
8068 return if !$machine_type;
8069
8070 my $use_old_bios_files = undef;
8071
8072 if ($machine_type =~ m/^(\S+)\.pxe$/) {
8073 $machine_type = $1;
8074 $use_old_bios_files = 1;
8075 } else {
8076 my $version = extract_version($machine_type, kvm_user_version());
8077 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
8078 # load new efi bios files on migration. So this hack is required to allow
8079 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
8080 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
8081 $use_old_bios_files = !min_version($version, 2, 4);
8082 }
8083
8084 return ($use_old_bios_files, $machine_type);
8085 }
8086
8087 sub get_efivars_size {
8088 my ($conf, $efidisk) = @_;
8089
8090 my $arch = get_vm_arch($conf);
8091 $efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
8092 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
8093 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
8094 die "uefi vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
8095 return -s $ovmf_vars;
8096 }
8097
8098 sub update_efidisk_size {
8099 my ($conf) = @_;
8100
8101 return if !defined($conf->{efidisk0});
8102
8103 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
8104 $disk->{size} = get_efivars_size($conf);
8105 $conf->{efidisk0} = print_drive($disk);
8106
8107 return;
8108 }
8109
8110 sub update_tpmstate_size {
8111 my ($conf) = @_;
8112
8113 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
8114 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8115 $conf->{tpmstate0} = print_drive($disk);
8116 }
8117
8118 sub create_efidisk($$$$$$$) {
8119 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
8120
8121 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
8122 die "EFI vars default image not found\n" if ! -f $ovmf_vars;
8123
8124 my $vars_size_b = -s $ovmf_vars;
8125 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
8126 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
8127 PVE::Storage::activate_volumes($storecfg, [$volid]);
8128
8129 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
8130 my ($size) = PVE::Storage::volume_size_info($storecfg, $volid, 3);
8131
8132 return ($volid, $size/1024);
8133 }
8134
8135 sub vm_iothreads_list {
8136 my ($vmid) = @_;
8137
8138 my $res = mon_cmd($vmid, 'query-iothreads');
8139
8140 my $iothreads = {};
8141 foreach my $iothread (@$res) {
8142 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
8143 }
8144
8145 return $iothreads;
8146 }
8147
8148 sub scsihw_infos {
8149 my ($conf, $drive) = @_;
8150
8151 my $maxdev = 0;
8152
8153 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
8154 $maxdev = 7;
8155 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
8156 $maxdev = 1;
8157 } else {
8158 $maxdev = 256;
8159 }
8160
8161 my $controller = int($drive->{index} / $maxdev);
8162 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
8163 ? "virtioscsi"
8164 : "scsihw";
8165
8166 return ($maxdev, $controller, $controller_prefix);
8167 }
8168
8169 sub resolve_dst_disk_format {
8170 my ($storecfg, $storeid, $src_volname, $format) = @_;
8171 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
8172
8173 if (!$format) {
8174 # if no target format is specified, use the source disk format as hint
8175 if ($src_volname) {
8176 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8177 $format = qemu_img_format($scfg, $src_volname);
8178 } else {
8179 return $defFormat;
8180 }
8181 }
8182
8183 # test if requested format is supported - else use default
8184 my $supported = grep { $_ eq $format } @$validFormats;
8185 $format = $defFormat if !$supported;
8186 return $format;
8187 }
8188
8189 # NOTE: if this logic changes, please update docs & possibly gui logic
8190 sub find_vmstate_storage {
8191 my ($conf, $storecfg) = @_;
8192
8193 # first, return storage from conf if set
8194 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
8195
8196 my ($target, $shared, $local);
8197
8198 foreach_storage_used_by_vm($conf, sub {
8199 my ($sid) = @_;
8200 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
8201 my $dst = $scfg->{shared} ? \$shared : \$local;
8202 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
8203 });
8204
8205 # second, use shared storage where VM has at least one disk
8206 # third, use local storage where VM has at least one disk
8207 # fall back to local storage
8208 $target = $shared // $local // 'local';
8209
8210 return $target;
8211 }
8212
8213 sub generate_uuid {
8214 my ($uuid, $uuid_str);
8215 UUID::generate($uuid);
8216 UUID::unparse($uuid, $uuid_str);
8217 return $uuid_str;
8218 }
8219
8220 sub generate_smbios1_uuid {
8221 return "uuid=".generate_uuid();
8222 }
8223
8224 sub nbd_stop {
8225 my ($vmid) = @_;
8226
8227 mon_cmd($vmid, 'nbd-server-stop');
8228 }
8229
8230 sub create_reboot_request {
8231 my ($vmid) = @_;
8232 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
8233 or die "failed to create reboot trigger file: $!\n";
8234 close($fh);
8235 }
8236
8237 sub clear_reboot_request {
8238 my ($vmid) = @_;
8239 my $path = "/run/qemu-server/$vmid.reboot";
8240 my $res = 0;
8241
8242 $res = unlink($path);
8243 die "could not remove reboot request for $vmid: $!"
8244 if !$res && $! != POSIX::ENOENT;
8245
8246 return $res;
8247 }
8248
8249 sub bootorder_from_legacy {
8250 my ($conf, $bootcfg) = @_;
8251
8252 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
8253 my $bootindex_hash = {};
8254 my $i = 1;
8255 foreach my $o (split(//, $boot)) {
8256 $bootindex_hash->{$o} = $i*100;
8257 $i++;
8258 }
8259
8260 my $bootorder = {};
8261
8262 PVE::QemuConfig->foreach_volume($conf, sub {
8263 my ($ds, $drive) = @_;
8264
8265 if (drive_is_cdrom ($drive, 1)) {
8266 if ($bootindex_hash->{d}) {
8267 $bootorder->{$ds} = $bootindex_hash->{d};
8268 $bootindex_hash->{d} += 1;
8269 }
8270 } elsif ($bootindex_hash->{c}) {
8271 $bootorder->{$ds} = $bootindex_hash->{c}
8272 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
8273 $bootindex_hash->{c} += 1;
8274 }
8275 });
8276
8277 if ($bootindex_hash->{n}) {
8278 for (my $i = 0; $i < $MAX_NETS; $i++) {
8279 my $netname = "net$i";
8280 next if !$conf->{$netname};
8281 $bootorder->{$netname} = $bootindex_hash->{n};
8282 $bootindex_hash->{n} += 1;
8283 }
8284 }
8285
8286 return $bootorder;
8287 }
8288
8289 # Generate default device list for 'boot: order=' property. Matches legacy
8290 # default boot order, but with explicit device names. This is important, since
8291 # the fallback for when neither 'order' nor the old format is specified relies
8292 # on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
8293 sub get_default_bootdevices {
8294 my ($conf) = @_;
8295
8296 my @ret = ();
8297
8298 # harddisk
8299 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
8300 push @ret, $first if $first;
8301
8302 # cdrom
8303 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
8304 push @ret, $first if $first;
8305
8306 # network
8307 for (my $i = 0; $i < $MAX_NETS; $i++) {
8308 my $netname = "net$i";
8309 next if !$conf->{$netname};
8310 push @ret, $netname;
8311 last;
8312 }
8313
8314 return \@ret;
8315 }
8316
8317 sub device_bootorder {
8318 my ($conf) = @_;
8319
8320 return bootorder_from_legacy($conf) if !defined($conf->{boot});
8321
8322 my $boot = parse_property_string($boot_fmt, $conf->{boot});
8323
8324 my $bootorder = {};
8325 if (!defined($boot) || $boot->{legacy}) {
8326 $bootorder = bootorder_from_legacy($conf, $boot);
8327 } elsif ($boot->{order}) {
8328 my $i = 100; # start at 100 to allow user to insert devices before us with -args
8329 for my $dev (PVE::Tools::split_list($boot->{order})) {
8330 $bootorder->{$dev} = $i++;
8331 }
8332 }
8333
8334 return $bootorder;
8335 }
8336
8337 sub register_qmeventd_handle {
8338 my ($vmid) = @_;
8339
8340 my $fh;
8341 my $peer = "/var/run/qmeventd.sock";
8342 my $count = 0;
8343
8344 for (;;) {
8345 $count++;
8346 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
8347 last if $fh;
8348 if ($! != EINTR && $! != EAGAIN) {
8349 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
8350 }
8351 if ($count > 4) {
8352 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
8353 . "after $count retries\n";
8354 }
8355 usleep(25000);
8356 }
8357
8358 # send handshake to mark VM as backing up
8359 print $fh to_json({vzdump => {vmid => "$vmid"}});
8360
8361 # return handle to be closed later when inhibit is no longer required
8362 return $fh;
8363 }
8364
8365 # bash completion helper
8366
8367 sub complete_backup_archives {
8368 my ($cmdname, $pname, $cvalue) = @_;
8369
8370 my $cfg = PVE::Storage::config();
8371
8372 my $storeid;
8373
8374 if ($cvalue =~ m/^([^:]+):/) {
8375 $storeid = $1;
8376 }
8377
8378 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
8379
8380 my $res = [];
8381 foreach my $id (keys %$data) {
8382 foreach my $item (@{$data->{$id}}) {
8383 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
8384 push @$res, $item->{volid} if defined($item->{volid});
8385 }
8386 }
8387
8388 return $res;
8389 }
8390
8391 my $complete_vmid_full = sub {
8392 my ($running) = @_;
8393
8394 my $idlist = vmstatus();
8395
8396 my $res = [];
8397
8398 foreach my $id (keys %$idlist) {
8399 my $d = $idlist->{$id};
8400 if (defined($running)) {
8401 next if $d->{template};
8402 next if $running && $d->{status} ne 'running';
8403 next if !$running && $d->{status} eq 'running';
8404 }
8405 push @$res, $id;
8406
8407 }
8408 return $res;
8409 };
8410
8411 sub complete_vmid {
8412 return &$complete_vmid_full();
8413 }
8414
8415 sub complete_vmid_stopped {
8416 return &$complete_vmid_full(0);
8417 }
8418
8419 sub complete_vmid_running {
8420 return &$complete_vmid_full(1);
8421 }
8422
8423 sub complete_storage {
8424
8425 my $cfg = PVE::Storage::config();
8426 my $ids = $cfg->{ids};
8427
8428 my $res = [];
8429 foreach my $sid (keys %$ids) {
8430 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
8431 next if !$ids->{$sid}->{content}->{images};
8432 push @$res, $sid;
8433 }
8434
8435 return $res;
8436 }
8437
8438 sub complete_migration_storage {
8439 my ($cmd, $param, $current_value, $all_args) = @_;
8440
8441 my $targetnode = @$all_args[1];
8442
8443 my $cfg = PVE::Storage::config();
8444 my $ids = $cfg->{ids};
8445
8446 my $res = [];
8447 foreach my $sid (keys %$ids) {
8448 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
8449 next if !$ids->{$sid}->{content}->{images};
8450 push @$res, $sid;
8451 }
8452
8453 return $res;
8454 }
8455
8456 sub vm_is_paused {
8457 my ($vmid) = @_;
8458 my $qmpstatus = eval {
8459 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8460 mon_cmd($vmid, "query-status");
8461 };
8462 warn "$@\n" if $@;
8463 return $qmpstatus && $qmpstatus->{status} eq "paused";
8464 }
8465
8466 sub check_volume_storage_type {
8467 my ($storecfg, $vol) = @_;
8468
8469 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8470 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8471 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8472
8473 die "storage '$storeid' does not support content-type '$vtype'\n"
8474 if !$scfg->{content}->{$vtype};
8475
8476 return 1;
8477 }
8478
8479 sub add_nets_bridge_fdb {
8480 my ($conf, $vmid) = @_;
8481
8482 for my $opt (keys %$conf) {
8483 next if $opt !~ m/^net(\d+)$/;
8484 my $iface = "tap${vmid}i$1";
8485 # NOTE: expect setups with learning off to *not* use auto-random-generation of MAC on start
8486 my $net = parse_net($conf->{$opt}, 1) or next;
8487
8488 my $mac = $net->{macaddr};
8489 if (!$mac) {
8490 log_warn("MAC learning disabled, but vNIC '$iface' has no static MAC to add to forwarding DB!")
8491 if !file_read_firstline("/sys/class/net/$iface/brport/learning");
8492 next;
8493 }
8494
8495 my $bridge = $net->{bridge};
8496 if ($have_sdn) {
8497 PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
8498 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
8499 PVE::Network::add_bridge_fdb($iface, $mac, $net->{firewall});
8500 }
8501 }
8502 }
8503
8504 sub del_nets_bridge_fdb {
8505 my ($conf, $vmid) = @_;
8506
8507 for my $opt (keys %$conf) {
8508 next if $opt !~ m/^net(\d+)$/;
8509 my $iface = "tap${vmid}i$1";
8510
8511 my $net = parse_net($conf->{$opt}) or next;
8512 my $mac = $net->{macaddr} or next;
8513
8514 my $bridge = $net->{bridge};
8515 if ($have_sdn) {
8516 PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
8517 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
8518 PVE::Network::del_bridge_fdb($iface, $mac, $net->{firewall});
8519 }
8520 }
8521 }
8522
8523 1;