]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
tests: cfg2cmd: add a few q35 related tests
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5
6 use Cwd 'abs_path';
7 use Digest::SHA;
8 use Fcntl ':flock';
9 use Fcntl;
10 use File::Basename;
11 use File::Copy qw(copy);
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use IO::Dir;
16 use IO::File;
17 use IO::Handle;
18 use IO::Select;
19 use IO::Socket::UNIX;
20 use IPC::Open3;
21 use JSON;
22 use MIME::Base64;
23 use POSIX;
24 use Storable qw(dclone);
25 use Time::HiRes qw(gettimeofday usleep);
26 use URI::Escape;
27 use UUID;
28
29 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
30 use PVE::CGroup;
31 use PVE::DataCenterConfig;
32 use PVE::Exception qw(raise raise_param_exc);
33 use PVE::Format qw(render_duration render_bytes);
34 use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
35 use PVE::INotify;
36 use PVE::JSONSchema qw(get_standard_option parse_property_string);
37 use PVE::ProcFSTools;
38 use PVE::PBSClient;
39 use PVE::RPCEnvironment;
40 use PVE::Storage;
41 use PVE::SysFSTools;
42 use PVE::Systemd;
43 use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
44
45 use PVE::QMPClient;
46 use PVE::QemuConfig;
47 use PVE::QemuServer::Helpers qw(min_version config_aware_timeout);
48 use PVE::QemuServer::Cloudinit;
49 use PVE::QemuServer::CGroup;
50 use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
51 use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
52 use PVE::QemuServer::Machine;
53 use PVE::QemuServer::Memory;
54 use PVE::QemuServer::Monitor qw(mon_cmd);
55 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
56 use PVE::QemuServer::USB qw(parse_usb_device);
57
58 my $have_sdn;
59 eval {
60 require PVE::Network::SDN::Zones;
61 $have_sdn = 1;
62 };
63
64 my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
65 my $OVMF = {
66 x86_64 => {
67 '4m-no-smm' => [
68 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
69 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
70 ],
71 '4m-no-smm-ms' => [
72 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
73 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
74 ],
75 '4m' => [
76 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
77 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
78 ],
79 '4m-ms' => [
80 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
81 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
82 ],
83 default => [
84 "$EDK2_FW_BASE/OVMF_CODE.fd",
85 "$EDK2_FW_BASE/OVMF_VARS.fd",
86 ],
87 },
88 aarch64 => {
89 default => [
90 "$EDK2_FW_BASE/AAVMF_CODE.fd",
91 "$EDK2_FW_BASE/AAVMF_VARS.fd",
92 ],
93 },
94 };
95
96 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
97
98 # Note about locking: we use flock on the config file protect against concurent actions.
99 # Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
100 # 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
101 # But you can ignore this kind of lock with the --skiplock flag.
102
103 cfs_register_file('/qemu-server/',
104 \&parse_vm_config,
105 \&write_vm_config);
106
107 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
108 description => "Some command save/restore state from this location.",
109 type => 'string',
110 maxLength => 128,
111 optional => 1,
112 });
113
114 PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
115 description => "Specifies the Qemu machine type.",
116 type => 'string',
117 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
118 maxLength => 40,
119 optional => 1,
120 });
121
122
123 sub map_storage {
124 my ($map, $source) = @_;
125
126 return $source if !defined($map);
127
128 return $map->{entries}->{$source}
129 if $map->{entries} && defined($map->{entries}->{$source});
130
131 return $map->{default} if $map->{default};
132
133 # identity (fallback)
134 return $source;
135 }
136
137 PVE::JSONSchema::register_standard_option('pve-targetstorage', {
138 description => "Mapping from source to target storages. Providing only a single storage ID maps all source storages to that storage. Providing the special value '1' will map each source storage to itself.",
139 type => 'string',
140 format => 'storagepair-list',
141 optional => 1,
142 });
143
144 #no warnings 'redefine';
145
146 my $nodename_cache;
147 sub nodename {
148 $nodename_cache //= PVE::INotify::nodename();
149 return $nodename_cache;
150 }
151
152 my $watchdog_fmt = {
153 model => {
154 default_key => 1,
155 type => 'string',
156 enum => [qw(i6300esb ib700)],
157 description => "Watchdog type to emulate.",
158 default => 'i6300esb',
159 optional => 1,
160 },
161 action => {
162 type => 'string',
163 enum => [qw(reset shutdown poweroff pause debug none)],
164 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
165 optional => 1,
166 },
167 };
168 PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
169
170 my $agent_fmt = {
171 enabled => {
172 description => "Enable/disable communication with a Qemu Guest Agent (QGA) running in the VM.",
173 type => 'boolean',
174 default => 0,
175 default_key => 1,
176 },
177 fstrim_cloned_disks => {
178 description => "Run fstrim after moving a disk or migrating the VM.",
179 type => 'boolean',
180 optional => 1,
181 default => 0
182 },
183 type => {
184 description => "Select the agent type",
185 type => 'string',
186 default => 'virtio',
187 optional => 1,
188 enum => [qw(virtio isa)],
189 },
190 };
191
192 my $vga_fmt = {
193 type => {
194 description => "Select the VGA type.",
195 type => 'string',
196 default => 'std',
197 optional => 1,
198 default_key => 1,
199 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio vmware)],
200 },
201 memory => {
202 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
203 type => 'integer',
204 optional => 1,
205 minimum => 4,
206 maximum => 512,
207 },
208 };
209
210 my $ivshmem_fmt = {
211 size => {
212 type => 'integer',
213 minimum => 1,
214 description => "The size of the file in MB.",
215 },
216 name => {
217 type => 'string',
218 pattern => '[a-zA-Z0-9\-]+',
219 optional => 1,
220 format_description => 'string',
221 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
222 },
223 };
224
225 my $audio_fmt = {
226 device => {
227 type => 'string',
228 enum => [qw(ich9-intel-hda intel-hda AC97)],
229 description => "Configure an audio device."
230 },
231 driver => {
232 type => 'string',
233 enum => ['spice', 'none'],
234 default => 'spice',
235 optional => 1,
236 description => "Driver backend for the audio device."
237 },
238 };
239
240 my $spice_enhancements_fmt = {
241 foldersharing => {
242 type => 'boolean',
243 optional => 1,
244 default => '0',
245 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
246 },
247 videostreaming => {
248 type => 'string',
249 enum => ['off', 'all', 'filter'],
250 default => 'off',
251 optional => 1,
252 description => "Enable video streaming. Uses compression for detected video streams."
253 },
254 };
255
256 my $rng_fmt = {
257 source => {
258 type => 'string',
259 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
260 default_key => 1,
261 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
262 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
263 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
264 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
265 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
266 ." a hardware RNG from the host.",
267 },
268 max_bytes => {
269 type => 'integer',
270 description => "Maximum bytes of entropy allowed to get injected into the guest every"
271 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
272 ." `0` to disable limiting (potentially dangerous!).",
273 optional => 1,
274
275 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
276 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
277 # reading from /dev/urandom
278 default => 1024,
279 },
280 period => {
281 type => 'integer',
282 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
283 ." the guest to retrieve another 'max_bytes' of entropy.",
284 optional => 1,
285 default => 1000,
286 },
287 };
288
289 my $meta_info_fmt = {
290 'ctime' => {
291 type => 'integer',
292 description => "The guest creation timestamp as UNIX epoch time",
293 minimum => 0,
294 optional => 1,
295 },
296 'creation-qemu' => {
297 type => 'string',
298 description => "The QEMU (machine) version from the time this VM was created.",
299 pattern => '\d+(\.\d+)+',
300 optional => 1,
301 },
302 };
303
304 my $confdesc = {
305 onboot => {
306 optional => 1,
307 type => 'boolean',
308 description => "Specifies whether a VM will be started during system bootup.",
309 default => 0,
310 },
311 autostart => {
312 optional => 1,
313 type => 'boolean',
314 description => "Automatic restart after crash (currently ignored).",
315 default => 0,
316 },
317 hotplug => {
318 optional => 1,
319 type => 'string', format => 'pve-hotplug-features',
320 description => "Selectively enable hotplug features. This is a comma separated list of"
321 ." hotplug features: 'network', 'disk', 'cpu', 'memory' and 'usb'. Use '0' to disable"
322 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`.",
323 default => 'network,disk,usb',
324 },
325 reboot => {
326 optional => 1,
327 type => 'boolean',
328 description => "Allow reboot. If set to '0' the VM exit on reboot.",
329 default => 1,
330 },
331 lock => {
332 optional => 1,
333 type => 'string',
334 description => "Lock/unlock the VM.",
335 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
336 },
337 cpulimit => {
338 optional => 1,
339 type => 'number',
340 description => "Limit of CPU usage.",
341 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
342 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
343 minimum => 0,
344 maximum => 128,
345 default => 0,
346 },
347 cpuunits => {
348 optional => 1,
349 type => 'integer',
350 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
351 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
352 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
353 ." weights of all the other running VMs.",
354 minimum => 2,
355 maximum => 262144,
356 default => 'cgroup v1: 1024, cgroup v2: 100',
357 },
358 memory => {
359 optional => 1,
360 type => 'integer',
361 description => "Amount of RAM for the VM in MB. This is the maximum available memory when"
362 ." you use the balloon device.",
363 minimum => 16,
364 default => 512,
365 },
366 balloon => {
367 optional => 1,
368 type => 'integer',
369 description => "Amount of target RAM for the VM in MB. Using zero disables the ballon driver.",
370 minimum => 0,
371 },
372 shares => {
373 optional => 1,
374 type => 'integer',
375 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
376 ." more memory this VM gets. Number is relative to weights of all other running VMs."
377 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
378 minimum => 0,
379 maximum => 50000,
380 default => 1000,
381 },
382 keyboard => {
383 optional => 1,
384 type => 'string',
385 description => "Keyboard layout for VNC server. The default is read from the"
386 ."'/etc/pve/datacenter.cfg' configuration file. It should not be necessary to set it.",
387 enum => PVE::Tools::kvmkeymaplist(),
388 default => undef,
389 },
390 name => {
391 optional => 1,
392 type => 'string', format => 'dns-name',
393 description => "Set a name for the VM. Only used on the configuration web interface.",
394 },
395 scsihw => {
396 optional => 1,
397 type => 'string',
398 description => "SCSI controller model",
399 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
400 default => 'lsi',
401 },
402 description => {
403 optional => 1,
404 type => 'string',
405 description => "Description for the VM. Shown in the web-interface VM's summary."
406 ." This is saved as comment inside the configuration file.",
407 maxLength => 1024 * 8,
408 },
409 ostype => {
410 optional => 1,
411 type => 'string',
412 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
413 description => "Specify guest operating system.",
414 verbose_description => <<EODESC,
415 Specify guest operating system. This is used to enable special
416 optimization/features for specific operating systems:
417
418 [horizontal]
419 other;; unspecified OS
420 wxp;; Microsoft Windows XP
421 w2k;; Microsoft Windows 2000
422 w2k3;; Microsoft Windows 2003
423 w2k8;; Microsoft Windows 2008
424 wvista;; Microsoft Windows Vista
425 win7;; Microsoft Windows 7
426 win8;; Microsoft Windows 8/2012/2012r2
427 win10;; Microsoft Windows 10/2016/2019
428 win11;; Microsoft Windows 11/2022
429 l24;; Linux 2.4 Kernel
430 l26;; Linux 2.6 - 5.X Kernel
431 solaris;; Solaris/OpenSolaris/OpenIndiania kernel
432 EODESC
433 },
434 boot => {
435 optional => 1,
436 type => 'string', format => 'pve-qm-boot',
437 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
438 ." key or 'legacy=' is deprecated.",
439 },
440 bootdisk => {
441 optional => 1,
442 type => 'string', format => 'pve-qm-bootdisk',
443 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
444 pattern => '(ide|sata|scsi|virtio)\d+',
445 },
446 smp => {
447 optional => 1,
448 type => 'integer',
449 description => "The number of CPUs. Please use option -sockets instead.",
450 minimum => 1,
451 default => 1,
452 },
453 sockets => {
454 optional => 1,
455 type => 'integer',
456 description => "The number of CPU sockets.",
457 minimum => 1,
458 default => 1,
459 },
460 cores => {
461 optional => 1,
462 type => 'integer',
463 description => "The number of cores per socket.",
464 minimum => 1,
465 default => 1,
466 },
467 numa => {
468 optional => 1,
469 type => 'boolean',
470 description => "Enable/disable NUMA.",
471 default => 0,
472 },
473 hugepages => {
474 optional => 1,
475 type => 'string',
476 description => "Enable/disable hugepages memory.",
477 enum => [qw(any 2 1024)],
478 },
479 keephugepages => {
480 optional => 1,
481 type => 'boolean',
482 default => 0,
483 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
484 ." after VM shutdown and can be used for subsequent starts.",
485 },
486 vcpus => {
487 optional => 1,
488 type => 'integer',
489 description => "Number of hotplugged vcpus.",
490 minimum => 1,
491 default => 0,
492 },
493 acpi => {
494 optional => 1,
495 type => 'boolean',
496 description => "Enable/disable ACPI.",
497 default => 1,
498 },
499 agent => {
500 optional => 1,
501 description => "Enable/disable communication with the Qemu Guest Agent and its properties.",
502 type => 'string',
503 format => $agent_fmt,
504 },
505 kvm => {
506 optional => 1,
507 type => 'boolean',
508 description => "Enable/disable KVM hardware virtualization.",
509 default => 1,
510 },
511 tdf => {
512 optional => 1,
513 type => 'boolean',
514 description => "Enable/disable time drift fix.",
515 default => 0,
516 },
517 localtime => {
518 optional => 1,
519 type => 'boolean',
520 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
521 ." the `ostype` indicates a Microsoft Windows OS.",
522 },
523 freeze => {
524 optional => 1,
525 type => 'boolean',
526 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
527 },
528 vga => {
529 optional => 1,
530 type => 'string', format => $vga_fmt,
531 description => "Configure the VGA hardware.",
532 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
533 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
534 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
535 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
536 ." display server. For win* OS you can select how many independent displays you want,"
537 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
538 ." using a serial device as terminal.",
539 },
540 watchdog => {
541 optional => 1,
542 type => 'string', format => 'pve-qm-watchdog',
543 description => "Create a virtual hardware watchdog device.",
544 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
545 ." action), the watchdog must be periodically polled by an agent inside the guest or"
546 ." else the watchdog will reset the guest (or execute the respective action specified)",
547 },
548 startdate => {
549 optional => 1,
550 type => 'string',
551 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
552 description => "Set the initial date of the real time clock. Valid format for date are:"
553 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
554 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
555 default => 'now',
556 },
557 startup => get_standard_option('pve-startup-order'),
558 template => {
559 optional => 1,
560 type => 'boolean',
561 description => "Enable/disable Template.",
562 default => 0,
563 },
564 args => {
565 optional => 1,
566 type => 'string',
567 description => "Arbitrary arguments passed to kvm.",
568 verbose_description => <<EODESCR,
569 Arbitrary arguments passed to kvm, for example:
570
571 args: -no-reboot -no-hpet
572
573 NOTE: this option is for experts only.
574 EODESCR
575 },
576 tablet => {
577 optional => 1,
578 type => 'boolean',
579 default => 1,
580 description => "Enable/disable the USB tablet device.",
581 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
582 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
583 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
584 ." may consider disabling this to save some context switches. This is turned off by"
585 ." default if you use spice (`qm set <vmid> --vga qxl`).",
586 },
587 migrate_speed => {
588 optional => 1,
589 type => 'integer',
590 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
591 minimum => 0,
592 default => 0,
593 },
594 migrate_downtime => {
595 optional => 1,
596 type => 'number',
597 description => "Set maximum tolerated downtime (in seconds) for migrations.",
598 minimum => 0,
599 default => 0.1,
600 },
601 cdrom => {
602 optional => 1,
603 type => 'string', format => 'pve-qm-ide',
604 typetext => '<volume>',
605 description => "This is an alias for option -ide2",
606 },
607 cpu => {
608 optional => 1,
609 description => "Emulated CPU type.",
610 type => 'string',
611 format => 'pve-vm-cpu-conf',
612 },
613 parent => get_standard_option('pve-snapshot-name', {
614 optional => 1,
615 description => "Parent snapshot name. This is used internally, and should not be modified.",
616 }),
617 snaptime => {
618 optional => 1,
619 description => "Timestamp for snapshots.",
620 type => 'integer',
621 minimum => 0,
622 },
623 vmstate => {
624 optional => 1,
625 type => 'string', format => 'pve-volume-id',
626 description => "Reference to a volume which stores the VM state. This is used internally"
627 ." for snapshots.",
628 },
629 vmstatestorage => get_standard_option('pve-storage-id', {
630 description => "Default storage for VM state volumes/files.",
631 optional => 1,
632 }),
633 runningmachine => get_standard_option('pve-qemu-machine', {
634 description => "Specifies the QEMU machine type of the running vm. This is used internally"
635 ." for snapshots.",
636 }),
637 runningcpu => {
638 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
639 ." internally for snapshots.",
640 optional => 1,
641 type => 'string',
642 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
643 format_description => 'QEMU -cpu parameter'
644 },
645 machine => get_standard_option('pve-qemu-machine'),
646 arch => {
647 description => "Virtual processor architecture. Defaults to the host.",
648 optional => 1,
649 type => 'string',
650 enum => [qw(x86_64 aarch64)],
651 },
652 smbios1 => {
653 description => "Specify SMBIOS type 1 fields.",
654 type => 'string', format => 'pve-qm-smbios1',
655 maxLength => 512,
656 optional => 1,
657 },
658 protection => {
659 optional => 1,
660 type => 'boolean',
661 description => "Sets the protection flag of the VM. This will disable the remove VM and"
662 ." remove disk operations.",
663 default => 0,
664 },
665 bios => {
666 optional => 1,
667 type => 'string',
668 enum => [ qw(seabios ovmf) ],
669 description => "Select BIOS implementation.",
670 default => 'seabios',
671 },
672 vmgenid => {
673 type => 'string',
674 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
675 format_description => 'UUID',
676 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
677 ." to disable explicitly.",
678 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
679 ." value identifier to the guest OS. This allows to notify the guest operating system"
680 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
681 ." execution or creation from a template). The guest operating system notices the"
682 ." change, and is then able to react as appropriate by marking its copies of"
683 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
684 ."Note that auto-creation only works when done through API/CLI create or update methods"
685 .", but not when manually editing the config file.",
686 default => "1 (autogenerated)",
687 optional => 1,
688 },
689 hookscript => {
690 type => 'string',
691 format => 'pve-volume-id',
692 optional => 1,
693 description => "Script that will be executed during various steps in the vms lifetime.",
694 },
695 ivshmem => {
696 type => 'string',
697 format => $ivshmem_fmt,
698 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
699 ." the host.",
700 optional => 1,
701 },
702 audio0 => {
703 type => 'string',
704 format => $audio_fmt,
705 description => "Configure a audio device, useful in combination with QXL/Spice.",
706 optional => 1
707 },
708 spice_enhancements => {
709 type => 'string',
710 format => $spice_enhancements_fmt,
711 description => "Configure additional enhancements for SPICE.",
712 optional => 1
713 },
714 tags => {
715 type => 'string', format => 'pve-tag-list',
716 description => 'Tags of the VM. This is only meta information.',
717 optional => 1,
718 },
719 rng0 => {
720 type => 'string',
721 format => $rng_fmt,
722 description => "Configure a VirtIO-based Random Number Generator.",
723 optional => 1,
724 },
725 meta => {
726 type => 'string',
727 format => $meta_info_fmt,
728 description => "Some (read-only) meta-information about this guest.",
729 optional => 1,
730 },
731 };
732
733 my $cicustom_fmt = {
734 meta => {
735 type => 'string',
736 optional => 1,
737 description => 'Specify a custom file containing all meta data passed to the VM via"
738 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
739 format => 'pve-volume-id',
740 format_description => 'volume',
741 },
742 network => {
743 type => 'string',
744 optional => 1,
745 description => 'Specify a custom file containing all network data passed to the VM via'
746 .' cloud-init.',
747 format => 'pve-volume-id',
748 format_description => 'volume',
749 },
750 user => {
751 type => 'string',
752 optional => 1,
753 description => 'Specify a custom file containing all user data passed to the VM via'
754 .' cloud-init.',
755 format => 'pve-volume-id',
756 format_description => 'volume',
757 },
758 vendor => {
759 type => 'string',
760 optional => 1,
761 description => 'Specify a custom file containing all vendor data passed to the VM via'
762 .' cloud-init.',
763 format => 'pve-volume-id',
764 format_description => 'volume',
765 },
766 };
767 PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
768
769 my $confdesc_cloudinit = {
770 citype => {
771 optional => 1,
772 type => 'string',
773 description => 'Specifies the cloud-init configuration format. The default depends on the'
774 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
775 .' and `configdrive2` for windows.',
776 enum => ['configdrive2', 'nocloud', 'opennebula'],
777 },
778 ciuser => {
779 optional => 1,
780 type => 'string',
781 description => "cloud-init: User name to change ssh keys and password for instead of the"
782 ." image's configured default user.",
783 },
784 cipassword => {
785 optional => 1,
786 type => 'string',
787 description => 'cloud-init: Password to assign the user. Using this is generally not'
788 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
789 .' support hashed passwords.',
790 },
791 cicustom => {
792 optional => 1,
793 type => 'string',
794 description => 'cloud-init: Specify custom files to replace the automatically generated'
795 .' ones at start.',
796 format => 'pve-qm-cicustom',
797 },
798 searchdomain => {
799 optional => 1,
800 type => 'string',
801 description => "cloud-init: Sets DNS search domains for a container. Create will'
802 .' automatically use the setting from the host if neither searchdomain nor nameserver'
803 .' are set.",
804 },
805 nameserver => {
806 optional => 1,
807 type => 'string', format => 'address-list',
808 description => "cloud-init: Sets DNS server IP address for a container. Create will'
809 .' automatically use the setting from the host if neither searchdomain nor nameserver'
810 .' are set.",
811 },
812 sshkeys => {
813 optional => 1,
814 type => 'string',
815 format => 'urlencoded',
816 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
817 },
818 };
819
820 # what about other qemu settings ?
821 #cpu => 'string',
822 #machine => 'string',
823 #fda => 'file',
824 #fdb => 'file',
825 #mtdblock => 'file',
826 #sd => 'file',
827 #pflash => 'file',
828 #snapshot => 'bool',
829 #bootp => 'file',
830 ##tftp => 'dir',
831 ##smb => 'dir',
832 #kernel => 'file',
833 #append => 'string',
834 #initrd => 'file',
835 ##soundhw => 'string',
836
837 while (my ($k, $v) = each %$confdesc) {
838 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
839 }
840
841 my $MAX_USB_DEVICES = 5;
842 my $MAX_NETS = 32;
843 my $MAX_SERIAL_PORTS = 4;
844 my $MAX_PARALLEL_PORTS = 3;
845 my $MAX_NUMA = 8;
846
847 my $numa_fmt = {
848 cpus => {
849 type => "string",
850 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
851 description => "CPUs accessing this NUMA node.",
852 format_description => "id[-id];...",
853 },
854 memory => {
855 type => "number",
856 description => "Amount of memory this NUMA node provides.",
857 optional => 1,
858 },
859 hostnodes => {
860 type => "string",
861 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
862 description => "Host NUMA nodes to use.",
863 format_description => "id[-id];...",
864 optional => 1,
865 },
866 policy => {
867 type => 'string',
868 enum => [qw(preferred bind interleave)],
869 description => "NUMA allocation policy.",
870 optional => 1,
871 },
872 };
873 PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
874 my $numadesc = {
875 optional => 1,
876 type => 'string', format => $numa_fmt,
877 description => "NUMA topology.",
878 };
879 PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
880
881 for (my $i = 0; $i < $MAX_NUMA; $i++) {
882 $confdesc->{"numa$i"} = $numadesc;
883 }
884
885 my $nic_model_list = [
886 'e1000',
887 'e1000-82540em',
888 'e1000-82544gc',
889 'e1000-82545em',
890 'e1000e',
891 'i82551',
892 'i82557b',
893 'i82559er',
894 'ne2k_isa',
895 'ne2k_pci',
896 'pcnet',
897 'rtl8139',
898 'virtio',
899 'vmxnet3',
900 ];
901 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
902
903 my $net_fmt_bridge_descr = <<__EOD__;
904 Bridge to attach the network device to. The Proxmox VE standard bridge
905 is called 'vmbr0'.
906
907 If you do not specify a bridge, we create a kvm user (NATed) network
908 device, which provides DHCP and DNS services. The following addresses
909 are used:
910
911 10.0.2.2 Gateway
912 10.0.2.3 DNS Server
913 10.0.2.4 SMB Server
914
915 The DHCP server assign addresses to the guest starting from 10.0.2.15.
916 __EOD__
917
918 my $net_fmt = {
919 macaddr => get_standard_option('mac-addr', {
920 description => "MAC address. That address must be unique withing your network. This is"
921 ." automatically generated if not specified.",
922 }),
923 model => {
924 type => 'string',
925 description => "Network Card Model. The 'virtio' model provides the best performance with"
926 ." very low CPU overhead. If your guest does not support this driver, it is usually"
927 ." best to use 'e1000'.",
928 enum => $nic_model_list,
929 default_key => 1,
930 },
931 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
932 bridge => {
933 type => 'string',
934 description => $net_fmt_bridge_descr,
935 format_description => 'bridge',
936 pattern => '[-_.\w\d]+',
937 optional => 1,
938 },
939 queues => {
940 type => 'integer',
941 minimum => 0, maximum => 16,
942 description => 'Number of packet queues to be used on the device.',
943 optional => 1,
944 },
945 rate => {
946 type => 'number',
947 minimum => 0,
948 description => "Rate limit in mbps (megabytes per second) as floating point number.",
949 optional => 1,
950 },
951 tag => {
952 type => 'integer',
953 minimum => 1, maximum => 4094,
954 description => 'VLAN tag to apply to packets on this interface.',
955 optional => 1,
956 },
957 trunks => {
958 type => 'string',
959 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
960 description => 'VLAN trunks to pass through this interface.',
961 format_description => 'vlanid[;vlanid...]',
962 optional => 1,
963 },
964 firewall => {
965 type => 'boolean',
966 description => 'Whether this interface should be protected by the firewall.',
967 optional => 1,
968 },
969 link_down => {
970 type => 'boolean',
971 description => 'Whether this interface should be disconnected (like pulling the plug).',
972 optional => 1,
973 },
974 mtu => {
975 type => 'integer',
976 minimum => 1, maximum => 65520,
977 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
978 optional => 1,
979 },
980 };
981
982 my $netdesc = {
983 optional => 1,
984 type => 'string', format => $net_fmt,
985 description => "Specify network devices.",
986 };
987
988 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
989
990 my $ipconfig_fmt = {
991 ip => {
992 type => 'string',
993 format => 'pve-ipv4-config',
994 format_description => 'IPv4Format/CIDR',
995 description => 'IPv4 address in CIDR format.',
996 optional => 1,
997 default => 'dhcp',
998 },
999 gw => {
1000 type => 'string',
1001 format => 'ipv4',
1002 format_description => 'GatewayIPv4',
1003 description => 'Default gateway for IPv4 traffic.',
1004 optional => 1,
1005 requires => 'ip',
1006 },
1007 ip6 => {
1008 type => 'string',
1009 format => 'pve-ipv6-config',
1010 format_description => 'IPv6Format/CIDR',
1011 description => 'IPv6 address in CIDR format.',
1012 optional => 1,
1013 default => 'dhcp',
1014 },
1015 gw6 => {
1016 type => 'string',
1017 format => 'ipv6',
1018 format_description => 'GatewayIPv6',
1019 description => 'Default gateway for IPv6 traffic.',
1020 optional => 1,
1021 requires => 'ip6',
1022 },
1023 };
1024 PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
1025 my $ipconfigdesc = {
1026 optional => 1,
1027 type => 'string', format => 'pve-qm-ipconfig',
1028 description => <<'EODESCR',
1029 cloud-init: Specify IP addresses and gateways for the corresponding interface.
1030
1031 IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1032
1033 The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1034 gateway should be provided.
1035 For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1036 cloud-init 19.4 or newer.
1037
1038 If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1039 dhcp on IPv4.
1040 EODESCR
1041 };
1042 PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1043
1044 for (my $i = 0; $i < $MAX_NETS; $i++) {
1045 $confdesc->{"net$i"} = $netdesc;
1046 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1047 }
1048
1049 foreach my $key (keys %$confdesc_cloudinit) {
1050 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1051 }
1052
1053 PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1054 sub verify_volume_id_or_qm_path {
1055 my ($volid, $noerr) = @_;
1056
1057 if ($volid eq 'none' || $volid eq 'cdrom' || $volid =~ m|^/|) {
1058 return $volid;
1059 }
1060
1061 # if its neither 'none' nor 'cdrom' nor a path, check if its a volume-id
1062 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1063 if ($@) {
1064 return if $noerr;
1065 die $@;
1066 }
1067 return $volid;
1068 }
1069
1070 my $usb_fmt = {
1071 host => {
1072 default_key => 1,
1073 type => 'string', format => 'pve-qm-usb-device',
1074 format_description => 'HOSTUSBDEVICE|spice',
1075 description => <<EODESCR,
1076 The Host USB device or port or the value 'spice'. HOSTUSBDEVICE syntax is:
1077
1078 'bus-port(.port)*' (decimal numbers) or
1079 'vendor_id:product_id' (hexadeciaml numbers) or
1080 'spice'
1081
1082 You can use the 'lsusb -t' command to list existing usb devices.
1083
1084 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1085 machines - use with special care.
1086
1087 The value 'spice' can be used to add a usb redirection devices for spice.
1088 EODESCR
1089 },
1090 usb3 => {
1091 optional => 1,
1092 type => 'boolean',
1093 description => "Specifies whether if given host option is a USB3 device or port.",
1094 default => 0,
1095 },
1096 };
1097
1098 my $usbdesc = {
1099 optional => 1,
1100 type => 'string', format => $usb_fmt,
1101 description => "Configure an USB device (n is 0 to 4).",
1102 };
1103 PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
1104
1105 my $serialdesc = {
1106 optional => 1,
1107 type => 'string',
1108 pattern => '(/dev/.+|socket)',
1109 description => "Create a serial device inside the VM (n is 0 to 3)",
1110 verbose_description => <<EODESCR,
1111 Create a serial device inside the VM (n is 0 to 3), and pass through a
1112 host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1113 host side (use 'qm terminal' to open a terminal connection).
1114
1115 NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1116 use with special care.
1117
1118 CAUTION: Experimental! User reported problems with this option.
1119 EODESCR
1120 };
1121
1122 my $paralleldesc= {
1123 optional => 1,
1124 type => 'string',
1125 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1126 description => "Map host parallel devices (n is 0 to 2).",
1127 verbose_description => <<EODESCR,
1128 Map host parallel devices (n is 0 to 2).
1129
1130 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1131 machines - use with special care.
1132
1133 CAUTION: Experimental! User reported problems with this option.
1134 EODESCR
1135 };
1136
1137 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1138 $confdesc->{"parallel$i"} = $paralleldesc;
1139 }
1140
1141 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1142 $confdesc->{"serial$i"} = $serialdesc;
1143 }
1144
1145 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1146 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1147 }
1148
1149 for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1150 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1151 }
1152
1153 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1154 $confdesc->{"usb$i"} = $usbdesc;
1155 }
1156
1157 my $boot_fmt = {
1158 legacy => {
1159 optional => 1,
1160 default_key => 1,
1161 type => 'string',
1162 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1163 . " Deprecated, use 'order=' instead.",
1164 pattern => '[acdn]{1,4}',
1165 format_description => "[acdn]{1,4}",
1166
1167 # note: this is also the fallback if boot: is not given at all
1168 default => 'cdn',
1169 },
1170 order => {
1171 optional => 1,
1172 type => 'string',
1173 format => 'pve-qm-bootdev-list',
1174 format_description => "device[;device...]",
1175 description => <<EODESC,
1176 The guest will attempt to boot from devices in the order they appear here.
1177
1178 Disks, optical drives and passed-through storage USB devices will be directly
1179 booted from, NICs will load PXE, and PCIe devices will either behave like disks
1180 (e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1181
1182 Note that only devices in this list will be marked as bootable and thus loaded
1183 by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1184 (e.g. software-raid), you need to specify all of them here.
1185
1186 Overrides the deprecated 'legacy=[acdn]*' value when given.
1187 EODESC
1188 },
1189 };
1190 PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1191
1192 PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1193 sub verify_bootdev {
1194 my ($dev, $noerr) = @_;
1195
1196 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1197 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1198
1199 my $check = sub {
1200 my ($base) = @_;
1201 return 0 if $dev !~ m/^$base\d+$/;
1202 return 0 if !$confdesc->{$dev};
1203 return 1;
1204 };
1205
1206 return $dev if $check->("net");
1207 return $dev if $check->("usb");
1208 return $dev if $check->("hostpci");
1209
1210 return if $noerr;
1211 die "invalid boot device '$dev'\n";
1212 }
1213
1214 sub print_bootorder {
1215 my ($devs) = @_;
1216 return "" if !@$devs;
1217 my $data = { order => join(';', @$devs) };
1218 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1219 }
1220
1221 my $kvm_api_version = 0;
1222
1223 sub kvm_version {
1224 return $kvm_api_version if $kvm_api_version;
1225
1226 open my $fh, '<', '/dev/kvm' or return;
1227
1228 # 0xae00 => KVM_GET_API_VERSION
1229 $kvm_api_version = ioctl($fh, 0xae00, 0);
1230 close($fh);
1231
1232 return $kvm_api_version;
1233 }
1234
1235 my $kvm_user_version = {};
1236 my $kvm_mtime = {};
1237
1238 sub kvm_user_version {
1239 my ($binary) = @_;
1240
1241 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1242 my $st = stat($binary);
1243
1244 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1245 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1246 $cachedmtime == $st->mtime;
1247
1248 $kvm_user_version->{$binary} = 'unknown';
1249 $kvm_mtime->{$binary} = $st->mtime;
1250
1251 my $code = sub {
1252 my $line = shift;
1253 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1254 $kvm_user_version->{$binary} = $2;
1255 }
1256 };
1257
1258 eval { run_command([$binary, '--version'], outfunc => $code); };
1259 warn $@ if $@;
1260
1261 return $kvm_user_version->{$binary};
1262
1263 }
1264 my sub extract_version {
1265 my ($machine_type, $version) = @_;
1266 $version = kvm_user_version() if !defined($version);
1267 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
1268 }
1269
1270 sub kernel_has_vhost_net {
1271 return -c '/dev/vhost-net';
1272 }
1273
1274 sub option_exists {
1275 my $key = shift;
1276 return defined($confdesc->{$key});
1277 }
1278
1279 my $cdrom_path;
1280 sub get_cdrom_path {
1281
1282 return $cdrom_path if $cdrom_path;
1283
1284 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
1285 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
1286 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
1287 }
1288
1289 sub get_iso_path {
1290 my ($storecfg, $vmid, $cdrom) = @_;
1291
1292 if ($cdrom eq 'cdrom') {
1293 return get_cdrom_path();
1294 } elsif ($cdrom eq 'none') {
1295 return '';
1296 } elsif ($cdrom =~ m|^/|) {
1297 return $cdrom;
1298 } else {
1299 return PVE::Storage::path($storecfg, $cdrom);
1300 }
1301 }
1302
1303 # try to convert old style file names to volume IDs
1304 sub filename_to_volume_id {
1305 my ($vmid, $file, $media) = @_;
1306
1307 if (!($file eq 'none' || $file eq 'cdrom' ||
1308 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1309
1310 return if $file =~ m|/|;
1311
1312 if ($media && $media eq 'cdrom') {
1313 $file = "local:iso/$file";
1314 } else {
1315 $file = "local:$vmid/$file";
1316 }
1317 }
1318
1319 return $file;
1320 }
1321
1322 sub verify_media_type {
1323 my ($opt, $vtype, $media) = @_;
1324
1325 return if !$media;
1326
1327 my $etype;
1328 if ($media eq 'disk') {
1329 $etype = 'images';
1330 } elsif ($media eq 'cdrom') {
1331 $etype = 'iso';
1332 } else {
1333 die "internal error";
1334 }
1335
1336 return if ($vtype eq $etype);
1337
1338 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1339 }
1340
1341 sub cleanup_drive_path {
1342 my ($opt, $storecfg, $drive) = @_;
1343
1344 # try to convert filesystem paths to volume IDs
1345
1346 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1347 ($drive->{file} !~ m|^/dev/.+|) &&
1348 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1349 ($drive->{file} !~ m/^\d+$/)) {
1350 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1351 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1352 if !$vtype;
1353 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1354 verify_media_type($opt, $vtype, $drive->{media});
1355 $drive->{file} = $volid;
1356 }
1357
1358 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1359 }
1360
1361 sub parse_hotplug_features {
1362 my ($data) = @_;
1363
1364 my $res = {};
1365
1366 return $res if $data eq '0';
1367
1368 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1369
1370 foreach my $feature (PVE::Tools::split_list($data)) {
1371 if ($feature =~ m/^(network|disk|cpu|memory|usb)$/) {
1372 $res->{$1} = 1;
1373 } else {
1374 die "invalid hotplug feature '$feature'\n";
1375 }
1376 }
1377 return $res;
1378 }
1379
1380 PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1381 sub pve_verify_hotplug_features {
1382 my ($value, $noerr) = @_;
1383
1384 return $value if parse_hotplug_features($value);
1385
1386 return if $noerr;
1387
1388 die "unable to parse hotplug option\n";
1389 }
1390
1391 sub scsi_inquiry {
1392 my($fh, $noerr) = @_;
1393
1394 my $SG_IO = 0x2285;
1395 my $SG_GET_VERSION_NUM = 0x2282;
1396
1397 my $versionbuf = "\x00" x 8;
1398 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1399 if (!$ret) {
1400 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1401 return;
1402 }
1403 my $version = unpack("I", $versionbuf);
1404 if ($version < 30000) {
1405 die "scsi generic interface too old\n" if !$noerr;
1406 return;
1407 }
1408
1409 my $buf = "\x00" x 36;
1410 my $sensebuf = "\x00" x 8;
1411 my $cmd = pack("C x3 C x1", 0x12, 36);
1412
1413 # see /usr/include/scsi/sg.h
1414 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1415
1416 my $packet = pack(
1417 $sg_io_hdr_t, ord('S'), -3, length($cmd), length($sensebuf), 0, length($buf), $buf, $cmd, $sensebuf, 6000
1418 );
1419
1420 $ret = ioctl($fh, $SG_IO, $packet);
1421 if (!$ret) {
1422 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1423 return;
1424 }
1425
1426 my @res = unpack($sg_io_hdr_t, $packet);
1427 if ($res[17] || $res[18]) {
1428 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1429 return;
1430 }
1431
1432 my $res = {};
1433 $res->@{qw(type removable vendor product revision)} = unpack("C C x6 A8 A16 A4", $buf);
1434
1435 $res->{removable} = $res->{removable} & 128 ? 1 : 0;
1436 $res->{type} &= 0x1F;
1437
1438 return $res;
1439 }
1440
1441 sub path_is_scsi {
1442 my ($path) = @_;
1443
1444 my $fh = IO::File->new("+<$path") || return;
1445 my $res = scsi_inquiry($fh, 1);
1446 close($fh);
1447
1448 return $res;
1449 }
1450
1451 sub print_tabletdevice_full {
1452 my ($conf, $arch) = @_;
1453
1454 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1455
1456 # we use uhci for old VMs because tablet driver was buggy in older qemu
1457 my $usbbus;
1458 if (PVE::QemuServer::Machine::machine_type_is_q35($conf) || $arch eq 'aarch64') {
1459 $usbbus = 'ehci';
1460 } else {
1461 $usbbus = 'uhci';
1462 }
1463
1464 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1465 }
1466
1467 sub print_keyboarddevice_full {
1468 my ($conf, $arch) = @_;
1469
1470 return if $arch ne 'aarch64';
1471
1472 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1473 }
1474
1475 my sub get_drive_id {
1476 my ($drive) = @_;
1477 return "$drive->{interface}$drive->{index}";
1478 }
1479
1480 sub print_drivedevice_full {
1481 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1482
1483 my $device = '';
1484 my $maxdev = 0;
1485
1486 my $drive_id = get_drive_id($drive);
1487 if ($drive->{interface} eq 'virtio') {
1488 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1489 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1490 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1491 } elsif ($drive->{interface} eq 'scsi') {
1492
1493 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1494 my $unit = $drive->{index} % $maxdev;
1495 my $devicetype = 'hd';
1496 my $path = '';
1497 if (drive_is_cdrom($drive)) {
1498 $devicetype = 'cd';
1499 } else {
1500 if ($drive->{file} =~ m|^/|) {
1501 $path = $drive->{file};
1502 if (my $info = path_is_scsi($path)) {
1503 if ($info->{type} == 0 && $drive->{scsiblock}) {
1504 $devicetype = 'block';
1505 } elsif ($info->{type} == 1) { # tape
1506 $devicetype = 'generic';
1507 }
1508 }
1509 } else {
1510 $path = PVE::Storage::path($storecfg, $drive->{file});
1511 }
1512
1513 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
1514 my $version = extract_version($machine_type, kvm_user_version());
1515 if ($path =~ m/^iscsi\:\/\// &&
1516 !min_version($version, 4, 1)) {
1517 $devicetype = 'generic';
1518 }
1519 }
1520
1521 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1522 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
1523 } else {
1524 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1525 .",lun=$drive->{index}";
1526 }
1527 $device .= ",drive=drive-$drive_id,id=$drive_id";
1528
1529 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1530 $device .= ",rotation_rate=1";
1531 }
1532 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1533
1534 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1535 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1536 my $controller = int($drive->{index} / $maxdev);
1537 my $unit = $drive->{index} % $maxdev;
1538 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1539
1540 $device = "ide-$devicetype";
1541 if ($drive->{interface} eq 'ide') {
1542 $device .= ",bus=ide.$controller,unit=$unit";
1543 } else {
1544 $device .= ",bus=ahci$controller.$unit";
1545 }
1546 $device .= ",drive=drive-$drive_id,id=$drive_id";
1547
1548 if ($devicetype eq 'hd') {
1549 if (my $model = $drive->{model}) {
1550 $model = URI::Escape::uri_unescape($model);
1551 $device .= ",model=$model";
1552 }
1553 if ($drive->{ssd}) {
1554 $device .= ",rotation_rate=1";
1555 }
1556 }
1557 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1558 } elsif ($drive->{interface} eq 'usb') {
1559 die "implement me";
1560 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1561 } else {
1562 die "unsupported interface type";
1563 }
1564
1565 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1566
1567 if (my $serial = $drive->{serial}) {
1568 $serial = URI::Escape::uri_unescape($serial);
1569 $device .= ",serial=$serial";
1570 }
1571
1572
1573 return $device;
1574 }
1575
1576 sub get_initiator_name {
1577 my $initiator;
1578
1579 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1580 while (defined(my $line = <$fh>)) {
1581 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1582 $initiator = $1;
1583 last;
1584 }
1585 $fh->close();
1586
1587 return $initiator;
1588 }
1589
1590 sub print_drive_commandline_full {
1591 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1592
1593 my $path;
1594 my $volid = $drive->{file};
1595 my $format = $drive->{format};
1596 my $drive_id = get_drive_id($drive);
1597
1598 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1599 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1600
1601 if (drive_is_cdrom($drive)) {
1602 $path = get_iso_path($storecfg, $vmid, $volid);
1603 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
1604 } else {
1605 if ($storeid) {
1606 $path = PVE::Storage::path($storecfg, $volid);
1607 $format //= qemu_img_format($scfg, $volname);
1608 } else {
1609 $path = $volid;
1610 $format //= "raw";
1611 }
1612 }
1613
1614 my $is_rbd = $path =~ m/^rbd:/;
1615
1616 my $opts = '';
1617 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1618 foreach my $o (@qemu_drive_options) {
1619 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1620 }
1621
1622 # snapshot only accepts on|off
1623 if (defined($drive->{snapshot})) {
1624 my $v = $drive->{snapshot} ? 'on' : 'off';
1625 $opts .= ",snapshot=$v";
1626 }
1627
1628 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1629 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
1630 }
1631
1632 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1633 my ($dir, $qmpname) = @$type;
1634 if (my $v = $drive->{"mbps$dir"}) {
1635 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1636 }
1637 if (my $v = $drive->{"mbps${dir}_max"}) {
1638 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1639 }
1640 if (my $v = $drive->{"bps${dir}_max_length"}) {
1641 $opts .= ",throttling.bps$qmpname-max-length=$v";
1642 }
1643 if (my $v = $drive->{"iops${dir}"}) {
1644 $opts .= ",throttling.iops$qmpname=$v";
1645 }
1646 if (my $v = $drive->{"iops${dir}_max"}) {
1647 $opts .= ",throttling.iops$qmpname-max=$v";
1648 }
1649 if (my $v = $drive->{"iops${dir}_max_length"}) {
1650 $opts .= ",throttling.iops$qmpname-max-length=$v";
1651 }
1652 }
1653
1654 if ($pbs_name) {
1655 $format = "rbd" if $is_rbd;
1656 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1657 if !$format;
1658 $opts .= ",format=alloc-track,file.driver=$format";
1659 } elsif ($format) {
1660 $opts .= ",format=$format";
1661 }
1662
1663 my $cache_direct = 0;
1664
1665 if (my $cache = $drive->{cache}) {
1666 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1667 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1668 $opts .= ",cache=none";
1669 $cache_direct = 1;
1670 }
1671
1672 # io_uring with cache mode writeback or writethrough on krbd will hang...
1673 my $rbd_no_io_uring = $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1674
1675 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1676 # sometimes, just plain disable...
1677 my $lvm_no_io_uring = $scfg && $scfg->{type} eq 'lvm';
1678
1679 if (!$drive->{aio}) {
1680 if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring) {
1681 # io_uring supports all cache modes
1682 $opts .= ",aio=io_uring";
1683 } else {
1684 # aio native works only with O_DIRECT
1685 if($cache_direct) {
1686 $opts .= ",aio=native";
1687 } else {
1688 $opts .= ",aio=threads";
1689 }
1690 }
1691 }
1692
1693 if (!drive_is_cdrom($drive)) {
1694 my $detectzeroes;
1695 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1696 $detectzeroes = 'off';
1697 } elsif ($drive->{discard}) {
1698 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1699 } else {
1700 # This used to be our default with discard not being specified:
1701 $detectzeroes = 'on';
1702 }
1703
1704 # note: 'detect-zeroes' works per blockdev and we want it to persist
1705 # after the alloc-track is removed, so put it on 'file' directly
1706 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1707 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1708 }
1709
1710 if ($pbs_name) {
1711 $opts .= ",backing=$pbs_name";
1712 $opts .= ",auto-remove=on";
1713 }
1714
1715 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1716 my $file_param = "file";
1717 if ($pbs_name) {
1718 # non-rbd drivers require the underlying file to be a seperate block
1719 # node, so add a second .file indirection
1720 $file_param .= ".file" if !$is_rbd;
1721 $file_param .= ".filename";
1722 }
1723 my $pathinfo = $path ? "$file_param=$path," : '';
1724
1725 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1726 }
1727
1728 sub print_pbs_blockdev {
1729 my ($pbs_conf, $pbs_name) = @_;
1730 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1731 $blockdev .= ",repository=$pbs_conf->{repository}";
1732 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1733 $blockdev .= ",archive=$pbs_conf->{archive}";
1734 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1735 return $blockdev;
1736 }
1737
1738 sub print_netdevice_full {
1739 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type) = @_;
1740
1741 my $device = $net->{model};
1742 if ($net->{model} eq 'virtio') {
1743 $device = 'virtio-net-pci';
1744 };
1745
1746 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1747 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1748 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1749 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1750 # and out of each queue plus one config interrupt and control vector queue
1751 my $vectors = $net->{queues} * 2 + 2;
1752 $tmpstr .= ",vectors=$vectors,mq=on";
1753 }
1754 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1755
1756 if (my $mtu = $net->{mtu}) {
1757 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1758 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1759 if ($mtu == 1) {
1760 $mtu = $bridge_mtu;
1761 } elsif ($mtu < 576) {
1762 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1763 } elsif ($mtu > $bridge_mtu) {
1764 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1765 }
1766 $tmpstr .= ",host_mtu=$mtu";
1767 } else {
1768 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1769 }
1770 }
1771
1772 if ($use_old_bios_files) {
1773 my $romfile;
1774 if ($device eq 'virtio-net-pci') {
1775 $romfile = 'pxe-virtio.rom';
1776 } elsif ($device eq 'e1000') {
1777 $romfile = 'pxe-e1000.rom';
1778 } elsif ($device eq 'e1000e') {
1779 $romfile = 'pxe-e1000e.rom';
1780 } elsif ($device eq 'ne2k') {
1781 $romfile = 'pxe-ne2k_pci.rom';
1782 } elsif ($device eq 'pcnet') {
1783 $romfile = 'pxe-pcnet.rom';
1784 } elsif ($device eq 'rtl8139') {
1785 $romfile = 'pxe-rtl8139.rom';
1786 }
1787 $tmpstr .= ",romfile=$romfile" if $romfile;
1788 }
1789
1790 return $tmpstr;
1791 }
1792
1793 sub print_netdev_full {
1794 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1795
1796 my $i = '';
1797 if ($netid =~ m/^net(\d+)$/) {
1798 $i = int($1);
1799 }
1800
1801 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1802
1803 my $ifname = "tap${vmid}i$i";
1804
1805 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1806 die "interface name '$ifname' is too long (max 15 character)\n"
1807 if length($ifname) >= 16;
1808
1809 my $vhostparam = '';
1810 if (is_native($arch)) {
1811 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1812 }
1813
1814 my $vmname = $conf->{name} || "vm$vmid";
1815
1816 my $netdev = "";
1817 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1818
1819 if ($net->{bridge}) {
1820 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1821 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1822 } else {
1823 $netdev = "type=user,id=$netid,hostname=$vmname";
1824 }
1825
1826 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1827
1828 return $netdev;
1829 }
1830
1831 my $vga_map = {
1832 'cirrus' => 'cirrus-vga',
1833 'std' => 'VGA',
1834 'vmware' => 'vmware-svga',
1835 'virtio' => 'virtio-vga',
1836 };
1837
1838 sub print_vga_device {
1839 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1840
1841 my $type = $vga_map->{$vga->{type}};
1842 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1843 $type = 'virtio-gpu';
1844 }
1845 my $vgamem_mb = $vga->{memory};
1846
1847 my $max_outputs = '';
1848 if ($qxlnum) {
1849 $type = $id ? 'qxl' : 'qxl-vga';
1850
1851 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1852 # set max outputs so linux can have up to 4 qxl displays with one device
1853 if (min_version($machine_version, 4, 1)) {
1854 $max_outputs = ",max_outputs=4";
1855 }
1856 }
1857 }
1858
1859 die "no devicetype for $vga->{type}\n" if !$type;
1860
1861 my $memory = "";
1862 if ($vgamem_mb) {
1863 if ($vga->{type} eq 'virtio') {
1864 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1865 $memory = ",max_hostmem=$bytes";
1866 } elsif ($qxlnum) {
1867 # from https://www.spice-space.org/multiple-monitors.html
1868 $memory = ",vgamem_mb=$vga->{memory}";
1869 my $ram = $vgamem_mb * 4;
1870 my $vram = $vgamem_mb * 2;
1871 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1872 } else {
1873 $memory = ",vgamem_mb=$vga->{memory}";
1874 }
1875 } elsif ($qxlnum && $id) {
1876 $memory = ",ram_size=67108864,vram_size=33554432";
1877 }
1878
1879 my $edidoff = "";
1880 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1881 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1882 }
1883
1884 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1885 my $vgaid = "vga" . ($id // '');
1886 my $pciaddr;
1887 if ($q35 && $vgaid eq 'vga') {
1888 # the first display uses pcie.0 bus on q35 machines
1889 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1890 } else {
1891 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1892 }
1893
1894 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1895 }
1896
1897 sub parse_number_sets {
1898 my ($set) = @_;
1899 my $res = [];
1900 foreach my $part (split(/;/, $set)) {
1901 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1902 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1903 push @$res, [ $1, $2 ];
1904 } else {
1905 die "invalid range: $part\n";
1906 }
1907 }
1908 return $res;
1909 }
1910
1911 sub parse_numa {
1912 my ($data) = @_;
1913
1914 my $res = parse_property_string($numa_fmt, $data);
1915 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1916 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
1917 return $res;
1918 }
1919
1920 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1921 sub parse_net {
1922 my ($data) = @_;
1923
1924 my $res = eval { parse_property_string($net_fmt, $data) };
1925 if ($@) {
1926 warn $@;
1927 return;
1928 }
1929 if (!defined($res->{macaddr})) {
1930 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1931 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1932 }
1933 return $res;
1934 }
1935
1936 # ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1937 sub parse_ipconfig {
1938 my ($data) = @_;
1939
1940 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1941 if ($@) {
1942 warn $@;
1943 return;
1944 }
1945
1946 if ($res->{gw} && !$res->{ip}) {
1947 warn 'gateway specified without specifying an IP address';
1948 return;
1949 }
1950 if ($res->{gw6} && !$res->{ip6}) {
1951 warn 'IPv6 gateway specified without specifying an IPv6 address';
1952 return;
1953 }
1954 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1955 warn 'gateway specified together with DHCP';
1956 return;
1957 }
1958 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1959 # gw6 + auto/dhcp
1960 warn "IPv6 gateway specified together with $res->{ip6} address";
1961 return;
1962 }
1963
1964 if (!$res->{ip} && !$res->{ip6}) {
1965 return { ip => 'dhcp', ip6 => 'dhcp' };
1966 }
1967
1968 return $res;
1969 }
1970
1971 sub print_net {
1972 my $net = shift;
1973
1974 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1975 }
1976
1977 sub add_random_macs {
1978 my ($settings) = @_;
1979
1980 foreach my $opt (keys %$settings) {
1981 next if $opt !~ m/^net(\d+)$/;
1982 my $net = parse_net($settings->{$opt});
1983 next if !$net;
1984 $settings->{$opt} = print_net($net);
1985 }
1986 }
1987
1988 sub vm_is_volid_owner {
1989 my ($storecfg, $vmid, $volid) = @_;
1990
1991 if ($volid !~ m|^/|) {
1992 my ($path, $owner);
1993 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
1994 if ($owner && ($owner == $vmid)) {
1995 return 1;
1996 }
1997 }
1998
1999 return;
2000 }
2001
2002 sub vmconfig_register_unused_drive {
2003 my ($storecfg, $vmid, $conf, $drive) = @_;
2004
2005 if (drive_is_cloudinit($drive)) {
2006 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
2007 warn $@ if $@;
2008 } elsif (!drive_is_cdrom($drive)) {
2009 my $volid = $drive->{file};
2010 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
2011 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
2012 }
2013 }
2014 }
2015
2016 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
2017 my $smbios1_fmt = {
2018 uuid => {
2019 type => 'string',
2020 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
2021 format_description => 'UUID',
2022 description => "Set SMBIOS1 UUID.",
2023 optional => 1,
2024 },
2025 version => {
2026 type => 'string',
2027 pattern => '[A-Za-z0-9+\/]+={0,2}',
2028 format_description => 'Base64 encoded string',
2029 description => "Set SMBIOS1 version.",
2030 optional => 1,
2031 },
2032 serial => {
2033 type => 'string',
2034 pattern => '[A-Za-z0-9+\/]+={0,2}',
2035 format_description => 'Base64 encoded string',
2036 description => "Set SMBIOS1 serial number.",
2037 optional => 1,
2038 },
2039 manufacturer => {
2040 type => 'string',
2041 pattern => '[A-Za-z0-9+\/]+={0,2}',
2042 format_description => 'Base64 encoded string',
2043 description => "Set SMBIOS1 manufacturer.",
2044 optional => 1,
2045 },
2046 product => {
2047 type => 'string',
2048 pattern => '[A-Za-z0-9+\/]+={0,2}',
2049 format_description => 'Base64 encoded string',
2050 description => "Set SMBIOS1 product ID.",
2051 optional => 1,
2052 },
2053 sku => {
2054 type => 'string',
2055 pattern => '[A-Za-z0-9+\/]+={0,2}',
2056 format_description => 'Base64 encoded string',
2057 description => "Set SMBIOS1 SKU string.",
2058 optional => 1,
2059 },
2060 family => {
2061 type => 'string',
2062 pattern => '[A-Za-z0-9+\/]+={0,2}',
2063 format_description => 'Base64 encoded string',
2064 description => "Set SMBIOS1 family string.",
2065 optional => 1,
2066 },
2067 base64 => {
2068 type => 'boolean',
2069 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2070 optional => 1,
2071 },
2072 };
2073
2074 sub parse_smbios1 {
2075 my ($data) = @_;
2076
2077 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2078 warn $@ if $@;
2079 return $res;
2080 }
2081
2082 sub print_smbios1 {
2083 my ($smbios1) = @_;
2084 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2085 }
2086
2087 PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2088
2089 sub parse_watchdog {
2090 my ($value) = @_;
2091
2092 return if !$value;
2093
2094 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2095 warn $@ if $@;
2096 return $res;
2097 }
2098
2099 sub parse_guest_agent {
2100 my ($conf) = @_;
2101
2102 return {} if !defined($conf->{agent});
2103
2104 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2105 warn $@ if $@;
2106
2107 # if the agent is disabled ignore the other potentially set properties
2108 return {} if !$res->{enabled};
2109 return $res;
2110 }
2111
2112 sub get_qga_key {
2113 my ($conf, $key) = @_;
2114 return undef if !defined($conf->{agent});
2115
2116 my $agent = parse_guest_agent($conf);
2117 return $agent->{$key};
2118 }
2119
2120 sub parse_vga {
2121 my ($value) = @_;
2122
2123 return {} if !$value;
2124 my $res = eval { parse_property_string($vga_fmt, $value) };
2125 warn $@ if $@;
2126 return $res;
2127 }
2128
2129 sub parse_rng {
2130 my ($value) = @_;
2131
2132 return if !$value;
2133
2134 my $res = eval { parse_property_string($rng_fmt, $value) };
2135 warn $@ if $@;
2136 return $res;
2137 }
2138
2139 sub parse_meta_info {
2140 my ($value) = @_;
2141
2142 return if !$value;
2143
2144 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2145 warn $@ if $@;
2146 return $res;
2147 }
2148
2149 sub new_meta_info_string {
2150 my () = @_; # for now do not allow to override any value
2151
2152 return PVE::JSONSchema::print_property_string(
2153 {
2154 'creation-qemu' => kvm_user_version(),
2155 ctime => "". int(time()),
2156 },
2157 $meta_info_fmt
2158 );
2159 }
2160
2161 PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
2162 sub verify_usb_device {
2163 my ($value, $noerr) = @_;
2164
2165 return $value if parse_usb_device($value);
2166
2167 return if $noerr;
2168
2169 die "unable to parse usb device\n";
2170 }
2171
2172 # add JSON properties for create and set function
2173 sub json_config_properties {
2174 my $prop = shift;
2175
2176 my $skip_json_config_opts = {
2177 parent => 1,
2178 snaptime => 1,
2179 vmstate => 1,
2180 runningmachine => 1,
2181 runningcpu => 1,
2182 meta => 1,
2183 };
2184
2185 foreach my $opt (keys %$confdesc) {
2186 next if $skip_json_config_opts->{$opt};
2187 $prop->{$opt} = $confdesc->{$opt};
2188 }
2189
2190 return $prop;
2191 }
2192
2193 # return copy of $confdesc_cloudinit to generate documentation
2194 sub cloudinit_config_properties {
2195
2196 return dclone($confdesc_cloudinit);
2197 }
2198
2199 sub check_type {
2200 my ($key, $value) = @_;
2201
2202 die "unknown setting '$key'\n" if !$confdesc->{$key};
2203
2204 my $type = $confdesc->{$key}->{type};
2205
2206 if (!defined($value)) {
2207 die "got undefined value\n";
2208 }
2209
2210 if ($value =~ m/[\n\r]/) {
2211 die "property contains a line feed\n";
2212 }
2213
2214 if ($type eq 'boolean') {
2215 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2216 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2217 die "type check ('boolean') failed - got '$value'\n";
2218 } elsif ($type eq 'integer') {
2219 return int($1) if $value =~ m/^(\d+)$/;
2220 die "type check ('integer') failed - got '$value'\n";
2221 } elsif ($type eq 'number') {
2222 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2223 die "type check ('number') failed - got '$value'\n";
2224 } elsif ($type eq 'string') {
2225 if (my $fmt = $confdesc->{$key}->{format}) {
2226 PVE::JSONSchema::check_format($fmt, $value);
2227 return $value;
2228 }
2229 $value =~ s/^\"(.*)\"$/$1/;
2230 return $value;
2231 } else {
2232 die "internal error"
2233 }
2234 }
2235
2236 sub destroy_vm {
2237 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2238
2239 my $conf = PVE::QemuConfig->load_config($vmid);
2240
2241 PVE::QemuConfig->check_lock($conf) if !$skiplock;
2242
2243 if ($conf->{template}) {
2244 # check if any base image is still used by a linked clone
2245 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2246 my ($ds, $drive) = @_;
2247 return if drive_is_cdrom($drive);
2248
2249 my $volid = $drive->{file};
2250 return if !$volid || $volid =~ m|^/|;
2251
2252 die "base volume '$volid' is still in use by linked cloned\n"
2253 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2254
2255 });
2256 }
2257
2258 my $volids = {};
2259 my $remove_owned_drive = sub {
2260 my ($ds, $drive) = @_;
2261 return if drive_is_cdrom($drive, 1);
2262
2263 my $volid = $drive->{file};
2264 return if !$volid || $volid =~ m|^/|;
2265 return if $volids->{$volid};
2266
2267 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2268 return if !$path || !$owner || ($owner != $vmid);
2269
2270 $volids->{$volid} = 1;
2271 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2272 warn "Could not remove disk '$volid', check manually: $@" if $@;
2273 };
2274
2275 # only remove disks owned by this VM (referenced in the config)
2276 my $include_opts = {
2277 include_unused => 1,
2278 extra_keys => ['vmstate'],
2279 };
2280 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2281
2282 for my $snap (values %{$conf->{snapshots}}) {
2283 next if !defined($snap->{vmstate});
2284 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2285 next if !defined($drive);
2286 $remove_owned_drive->('vmstate', $drive);
2287 }
2288
2289 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2290
2291 if ($purge_unreferenced) { # also remove unreferenced disk
2292 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2293 PVE::Storage::foreach_volid($vmdisks, sub {
2294 my ($volid, $sid, $volname, $d) = @_;
2295 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2296 warn $@ if $@;
2297 });
2298 }
2299
2300 if (defined $replacement_conf) {
2301 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2302 } else {
2303 PVE::QemuConfig->destroy_config($vmid);
2304 }
2305 }
2306
2307 sub parse_vm_config {
2308 my ($filename, $raw) = @_;
2309
2310 return if !defined($raw);
2311
2312 my $res = {
2313 digest => Digest::SHA::sha1_hex($raw),
2314 snapshots => {},
2315 pending => {},
2316 };
2317
2318 $filename =~ m|/qemu-server/(\d+)\.conf$|
2319 || die "got strange filename '$filename'";
2320
2321 my $vmid = $1;
2322
2323 my $conf = $res;
2324 my $descr;
2325 my $section = '';
2326
2327 my @lines = split(/\n/, $raw);
2328 foreach my $line (@lines) {
2329 next if $line =~ m/^\s*$/;
2330
2331 if ($line =~ m/^\[PENDING\]\s*$/i) {
2332 $section = 'pending';
2333 if (defined($descr)) {
2334 $descr =~ s/\s+$//;
2335 $conf->{description} = $descr;
2336 }
2337 $descr = undef;
2338 $conf = $res->{$section} = {};
2339 next;
2340
2341 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2342 $section = $1;
2343 if (defined($descr)) {
2344 $descr =~ s/\s+$//;
2345 $conf->{description} = $descr;
2346 }
2347 $descr = undef;
2348 $conf = $res->{snapshots}->{$section} = {};
2349 next;
2350 }
2351
2352 if ($line =~ m/^\#(.*)\s*$/) {
2353 $descr = '' if !defined($descr);
2354 $descr .= PVE::Tools::decode_text($1) . "\n";
2355 next;
2356 }
2357
2358 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2359 $descr = '' if !defined($descr);
2360 $descr .= PVE::Tools::decode_text($2);
2361 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2362 $conf->{snapstate} = $1;
2363 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2364 my $key = $1;
2365 my $value = $2;
2366 $conf->{$key} = $value;
2367 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2368 my $value = $1;
2369 if ($section eq 'pending') {
2370 $conf->{delete} = $value; # we parse this later
2371 } else {
2372 warn "vm $vmid - propertry 'delete' is only allowed in [PENDING]\n";
2373 }
2374 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2375 my $key = $1;
2376 my $value = $2;
2377 eval { $value = check_type($key, $value); };
2378 if ($@) {
2379 warn "vm $vmid - unable to parse value of '$key' - $@";
2380 } else {
2381 $key = 'ide2' if $key eq 'cdrom';
2382 my $fmt = $confdesc->{$key}->{format};
2383 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2384 my $v = parse_drive($key, $value);
2385 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2386 $v->{file} = $volid;
2387 $value = print_drive($v);
2388 } else {
2389 warn "vm $vmid - unable to parse value of '$key'\n";
2390 next;
2391 }
2392 }
2393
2394 $conf->{$key} = $value;
2395 }
2396 } else {
2397 warn "vm $vmid - unable to parse config: $line\n";
2398 }
2399 }
2400
2401 if (defined($descr)) {
2402 $descr =~ s/\s+$//;
2403 $conf->{description} = $descr;
2404 }
2405 delete $res->{snapstate}; # just to be sure
2406
2407 return $res;
2408 }
2409
2410 sub write_vm_config {
2411 my ($filename, $conf) = @_;
2412
2413 delete $conf->{snapstate}; # just to be sure
2414
2415 if ($conf->{cdrom}) {
2416 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2417 $conf->{ide2} = $conf->{cdrom};
2418 delete $conf->{cdrom};
2419 }
2420
2421 # we do not use 'smp' any longer
2422 if ($conf->{sockets}) {
2423 delete $conf->{smp};
2424 } elsif ($conf->{smp}) {
2425 $conf->{sockets} = $conf->{smp};
2426 delete $conf->{cores};
2427 delete $conf->{smp};
2428 }
2429
2430 my $used_volids = {};
2431
2432 my $cleanup_config = sub {
2433 my ($cref, $pending, $snapname) = @_;
2434
2435 foreach my $key (keys %$cref) {
2436 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2437 $key eq 'snapstate' || $key eq 'pending';
2438 my $value = $cref->{$key};
2439 if ($key eq 'delete') {
2440 die "propertry 'delete' is only allowed in [PENDING]\n"
2441 if !$pending;
2442 # fixme: check syntax?
2443 next;
2444 }
2445 eval { $value = check_type($key, $value); };
2446 die "unable to parse value of '$key' - $@" if $@;
2447
2448 $cref->{$key} = $value;
2449
2450 if (!$snapname && is_valid_drivename($key)) {
2451 my $drive = parse_drive($key, $value);
2452 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2453 }
2454 }
2455 };
2456
2457 &$cleanup_config($conf);
2458
2459 &$cleanup_config($conf->{pending}, 1);
2460
2461 foreach my $snapname (keys %{$conf->{snapshots}}) {
2462 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2463 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2464 }
2465
2466 # remove 'unusedX' settings if we re-add a volume
2467 foreach my $key (keys %$conf) {
2468 my $value = $conf->{$key};
2469 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2470 delete $conf->{$key};
2471 }
2472 }
2473
2474 my $generate_raw_config = sub {
2475 my ($conf, $pending) = @_;
2476
2477 my $raw = '';
2478
2479 # add description as comment to top of file
2480 if (defined(my $descr = $conf->{description})) {
2481 if ($descr) {
2482 foreach my $cl (split(/\n/, $descr)) {
2483 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2484 }
2485 } else {
2486 $raw .= "#\n" if $pending;
2487 }
2488 }
2489
2490 foreach my $key (sort keys %$conf) {
2491 next if $key =~ /^(digest|description|pending|snapshots)$/;
2492 $raw .= "$key: $conf->{$key}\n";
2493 }
2494 return $raw;
2495 };
2496
2497 my $raw = &$generate_raw_config($conf);
2498
2499 if (scalar(keys %{$conf->{pending}})){
2500 $raw .= "\n[PENDING]\n";
2501 $raw .= &$generate_raw_config($conf->{pending}, 1);
2502 }
2503
2504 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2505 $raw .= "\n[$snapname]\n";
2506 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2507 }
2508
2509 return $raw;
2510 }
2511
2512 sub load_defaults {
2513
2514 my $res = {};
2515
2516 # we use static defaults from our JSON schema configuration
2517 foreach my $key (keys %$confdesc) {
2518 if (defined(my $default = $confdesc->{$key}->{default})) {
2519 $res->{$key} = $default;
2520 }
2521 }
2522
2523 return $res;
2524 }
2525
2526 sub config_list {
2527 my $vmlist = PVE::Cluster::get_vmlist();
2528 my $res = {};
2529 return $res if !$vmlist || !$vmlist->{ids};
2530 my $ids = $vmlist->{ids};
2531 my $nodename = nodename();
2532
2533 foreach my $vmid (keys %$ids) {
2534 my $d = $ids->{$vmid};
2535 next if !$d->{node} || $d->{node} ne $nodename;
2536 next if !$d->{type} || $d->{type} ne 'qemu';
2537 $res->{$vmid}->{exists} = 1;
2538 }
2539 return $res;
2540 }
2541
2542 # test if VM uses local resources (to prevent migration)
2543 sub check_local_resources {
2544 my ($conf, $noerr) = @_;
2545
2546 my @loc_res = ();
2547
2548 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2549 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2550
2551 push @loc_res, "ivshmem" if $conf->{ivshmem};
2552
2553 foreach my $k (keys %$conf) {
2554 next if $k =~ m/^usb/ && ($conf->{$k} =~ m/^spice(?![^,])/);
2555 # sockets are safe: they will recreated be on the target side post-migrate
2556 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2557 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2558 }
2559
2560 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2561
2562 return \@loc_res;
2563 }
2564
2565 # check if used storages are available on all nodes (use by migrate)
2566 sub check_storage_availability {
2567 my ($storecfg, $conf, $node) = @_;
2568
2569 PVE::QemuConfig->foreach_volume($conf, sub {
2570 my ($ds, $drive) = @_;
2571
2572 my $volid = $drive->{file};
2573 return if !$volid;
2574
2575 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2576 return if !$sid;
2577
2578 # check if storage is available on both nodes
2579 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2580 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2581
2582 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2583
2584 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2585 if !$scfg->{content}->{$vtype};
2586 });
2587 }
2588
2589 # list nodes where all VM images are available (used by has_feature API)
2590 sub shared_nodes {
2591 my ($conf, $storecfg) = @_;
2592
2593 my $nodelist = PVE::Cluster::get_nodelist();
2594 my $nodehash = { map { $_ => 1 } @$nodelist };
2595 my $nodename = nodename();
2596
2597 PVE::QemuConfig->foreach_volume($conf, sub {
2598 my ($ds, $drive) = @_;
2599
2600 my $volid = $drive->{file};
2601 return if !$volid;
2602
2603 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2604 if ($storeid) {
2605 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2606 if ($scfg->{disable}) {
2607 $nodehash = {};
2608 } elsif (my $avail = $scfg->{nodes}) {
2609 foreach my $node (keys %$nodehash) {
2610 delete $nodehash->{$node} if !$avail->{$node};
2611 }
2612 } elsif (!$scfg->{shared}) {
2613 foreach my $node (keys %$nodehash) {
2614 delete $nodehash->{$node} if $node ne $nodename
2615 }
2616 }
2617 }
2618 });
2619
2620 return $nodehash
2621 }
2622
2623 sub check_local_storage_availability {
2624 my ($conf, $storecfg) = @_;
2625
2626 my $nodelist = PVE::Cluster::get_nodelist();
2627 my $nodehash = { map { $_ => {} } @$nodelist };
2628
2629 PVE::QemuConfig->foreach_volume($conf, sub {
2630 my ($ds, $drive) = @_;
2631
2632 my $volid = $drive->{file};
2633 return if !$volid;
2634
2635 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2636 if ($storeid) {
2637 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2638
2639 if ($scfg->{disable}) {
2640 foreach my $node (keys %$nodehash) {
2641 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2642 }
2643 } elsif (my $avail = $scfg->{nodes}) {
2644 foreach my $node (keys %$nodehash) {
2645 if (!$avail->{$node}) {
2646 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2647 }
2648 }
2649 }
2650 }
2651 });
2652
2653 foreach my $node (values %$nodehash) {
2654 if (my $unavail = $node->{unavailable_storages}) {
2655 $node->{unavailable_storages} = [ sort keys %$unavail ];
2656 }
2657 }
2658
2659 return $nodehash
2660 }
2661
2662 # Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2663 sub check_running {
2664 my ($vmid, $nocheck, $node) = @_;
2665
2666 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2667 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2668 }
2669
2670 sub vzlist {
2671
2672 my $vzlist = config_list();
2673
2674 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2675
2676 while (defined(my $de = $fd->read)) {
2677 next if $de !~ m/^(\d+)\.pid$/;
2678 my $vmid = $1;
2679 next if !defined($vzlist->{$vmid});
2680 if (my $pid = check_running($vmid)) {
2681 $vzlist->{$vmid}->{pid} = $pid;
2682 }
2683 }
2684
2685 return $vzlist;
2686 }
2687
2688 our $vmstatus_return_properties = {
2689 vmid => get_standard_option('pve-vmid'),
2690 status => {
2691 description => "Qemu process status.",
2692 type => 'string',
2693 enum => ['stopped', 'running'],
2694 },
2695 maxmem => {
2696 description => "Maximum memory in bytes.",
2697 type => 'integer',
2698 optional => 1,
2699 renderer => 'bytes',
2700 },
2701 maxdisk => {
2702 description => "Root disk size in bytes.",
2703 type => 'integer',
2704 optional => 1,
2705 renderer => 'bytes',
2706 },
2707 name => {
2708 description => "VM name.",
2709 type => 'string',
2710 optional => 1,
2711 },
2712 qmpstatus => {
2713 description => "Qemu QMP agent status.",
2714 type => 'string',
2715 optional => 1,
2716 },
2717 pid => {
2718 description => "PID of running qemu process.",
2719 type => 'integer',
2720 optional => 1,
2721 },
2722 uptime => {
2723 description => "Uptime.",
2724 type => 'integer',
2725 optional => 1,
2726 renderer => 'duration',
2727 },
2728 cpus => {
2729 description => "Maximum usable CPUs.",
2730 type => 'number',
2731 optional => 1,
2732 },
2733 lock => {
2734 description => "The current config lock, if any.",
2735 type => 'string',
2736 optional => 1,
2737 },
2738 tags => {
2739 description => "The current configured tags, if any",
2740 type => 'string',
2741 optional => 1,
2742 },
2743 'running-machine' => {
2744 description => "The currently running machine type (if running).",
2745 type => 'string',
2746 optional => 1,
2747 },
2748 'running-qemu' => {
2749 description => "The currently running QEMU version (if running).",
2750 type => 'string',
2751 optional => 1,
2752 },
2753 };
2754
2755 my $last_proc_pid_stat;
2756
2757 # get VM status information
2758 # This must be fast and should not block ($full == false)
2759 # We only query KVM using QMP if $full == true (this can be slow)
2760 sub vmstatus {
2761 my ($opt_vmid, $full) = @_;
2762
2763 my $res = {};
2764
2765 my $storecfg = PVE::Storage::config();
2766
2767 my $list = vzlist();
2768 my $defaults = load_defaults();
2769
2770 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2771
2772 my $cpucount = $cpuinfo->{cpus} || 1;
2773
2774 foreach my $vmid (keys %$list) {
2775 next if $opt_vmid && ($vmid ne $opt_vmid);
2776
2777 my $conf = PVE::QemuConfig->load_config($vmid);
2778
2779 my $d = { vmid => int($vmid) };
2780 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2781
2782 # fixme: better status?
2783 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2784
2785 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2786 if (defined($size)) {
2787 $d->{disk} = 0; # no info available
2788 $d->{maxdisk} = $size;
2789 } else {
2790 $d->{disk} = 0;
2791 $d->{maxdisk} = 0;
2792 }
2793
2794 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2795 * ($conf->{cores} || $defaults->{cores});
2796 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2797 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2798
2799 $d->{name} = $conf->{name} || "VM $vmid";
2800 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2801 : $defaults->{memory}*(1024*1024);
2802
2803 if ($conf->{balloon}) {
2804 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2805 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2806 : $defaults->{shares};
2807 }
2808
2809 $d->{uptime} = 0;
2810 $d->{cpu} = 0;
2811 $d->{mem} = 0;
2812
2813 $d->{netout} = 0;
2814 $d->{netin} = 0;
2815
2816 $d->{diskread} = 0;
2817 $d->{diskwrite} = 0;
2818
2819 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2820
2821 $d->{serial} = 1 if conf_has_serial($conf);
2822 $d->{lock} = $conf->{lock} if $conf->{lock};
2823 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2824
2825 $res->{$vmid} = $d;
2826 }
2827
2828 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2829 foreach my $dev (keys %$netdev) {
2830 next if $dev !~ m/^tap([1-9]\d*)i/;
2831 my $vmid = $1;
2832 my $d = $res->{$vmid};
2833 next if !$d;
2834
2835 $d->{netout} += $netdev->{$dev}->{receive};
2836 $d->{netin} += $netdev->{$dev}->{transmit};
2837
2838 if ($full) {
2839 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2840 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2841 }
2842
2843 }
2844
2845 my $ctime = gettimeofday;
2846
2847 foreach my $vmid (keys %$list) {
2848
2849 my $d = $res->{$vmid};
2850 my $pid = $d->{pid};
2851 next if !$pid;
2852
2853 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2854 next if !$pstat; # not running
2855
2856 my $used = $pstat->{utime} + $pstat->{stime};
2857
2858 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2859
2860 if ($pstat->{vsize}) {
2861 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2862 }
2863
2864 my $old = $last_proc_pid_stat->{$pid};
2865 if (!$old) {
2866 $last_proc_pid_stat->{$pid} = {
2867 time => $ctime,
2868 used => $used,
2869 cpu => 0,
2870 };
2871 next;
2872 }
2873
2874 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
2875
2876 if ($dtime > 1000) {
2877 my $dutime = $used - $old->{used};
2878
2879 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
2880 $last_proc_pid_stat->{$pid} = {
2881 time => $ctime,
2882 used => $used,
2883 cpu => $d->{cpu},
2884 };
2885 } else {
2886 $d->{cpu} = $old->{cpu};
2887 }
2888 }
2889
2890 return $res if !$full;
2891
2892 my $qmpclient = PVE::QMPClient->new();
2893
2894 my $ballooncb = sub {
2895 my ($vmid, $resp) = @_;
2896
2897 my $info = $resp->{'return'};
2898 return if !$info->{max_mem};
2899
2900 my $d = $res->{$vmid};
2901
2902 # use memory assigned to VM
2903 $d->{maxmem} = $info->{max_mem};
2904 $d->{balloon} = $info->{actual};
2905
2906 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
2907 $d->{mem} = $info->{total_mem} - $info->{free_mem};
2908 $d->{freemem} = $info->{free_mem};
2909 }
2910
2911 $d->{ballooninfo} = $info;
2912 };
2913
2914 my $blockstatscb = sub {
2915 my ($vmid, $resp) = @_;
2916 my $data = $resp->{'return'} || [];
2917 my $totalrdbytes = 0;
2918 my $totalwrbytes = 0;
2919
2920 for my $blockstat (@$data) {
2921 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
2922 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
2923
2924 $blockstat->{device} =~ s/drive-//;
2925 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
2926 }
2927 $res->{$vmid}->{diskread} = $totalrdbytes;
2928 $res->{$vmid}->{diskwrite} = $totalwrbytes;
2929 };
2930
2931 my $machinecb = sub {
2932 my ($vmid, $resp) = @_;
2933 my $data = $resp->{'return'} || [];
2934
2935 $res->{$vmid}->{'running-machine'} =
2936 PVE::QemuServer::Machine::current_from_query_machines($data);
2937 };
2938
2939 my $versioncb = sub {
2940 my ($vmid, $resp) = @_;
2941 my $data = $resp->{'return'} // {};
2942 my $version = 'unknown';
2943
2944 if (my $v = $data->{qemu}) {
2945 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
2946 }
2947
2948 $res->{$vmid}->{'running-qemu'} = $version;
2949 };
2950
2951 my $statuscb = sub {
2952 my ($vmid, $resp) = @_;
2953
2954 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
2955 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
2956 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
2957 # this fails if ballon driver is not loaded, so this must be
2958 # the last commnand (following command are aborted if this fails).
2959 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
2960
2961 my $status = 'unknown';
2962 if (!defined($status = $resp->{'return'}->{status})) {
2963 warn "unable to get VM status\n";
2964 return;
2965 }
2966
2967 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
2968 };
2969
2970 foreach my $vmid (keys %$list) {
2971 next if $opt_vmid && ($vmid ne $opt_vmid);
2972 next if !$res->{$vmid}->{pid}; # not running
2973 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
2974 }
2975
2976 $qmpclient->queue_execute(undef, 2);
2977
2978 foreach my $vmid (keys %$list) {
2979 next if $opt_vmid && ($vmid ne $opt_vmid);
2980 next if !$res->{$vmid}->{pid}; #not running
2981
2982 # we can't use the $qmpclient since it might have already aborted on
2983 # 'query-balloon', but this might also fail for older versions...
2984 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
2985 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
2986 }
2987
2988 foreach my $vmid (keys %$list) {
2989 next if $opt_vmid && ($vmid ne $opt_vmid);
2990 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
2991 }
2992
2993 return $res;
2994 }
2995
2996 sub conf_has_serial {
2997 my ($conf) = @_;
2998
2999 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3000 if ($conf->{"serial$i"}) {
3001 return 1;
3002 }
3003 }
3004
3005 return 0;
3006 }
3007
3008 sub conf_has_audio {
3009 my ($conf, $id) = @_;
3010
3011 $id //= 0;
3012 my $audio = $conf->{"audio$id"};
3013 return if !defined($audio);
3014
3015 my $audioproperties = parse_property_string($audio_fmt, $audio);
3016 my $audiodriver = $audioproperties->{driver} // 'spice';
3017
3018 return {
3019 dev => $audioproperties->{device},
3020 dev_id => "audiodev$id",
3021 backend => $audiodriver,
3022 backend_id => "$audiodriver-backend${id}",
3023 };
3024 }
3025
3026 sub audio_devs {
3027 my ($audio, $audiopciaddr, $machine_version) = @_;
3028
3029 my $devs = [];
3030
3031 my $id = $audio->{dev_id};
3032 my $audiodev = "";
3033 if (min_version($machine_version, 4, 2)) {
3034 $audiodev = ",audiodev=$audio->{backend_id}";
3035 }
3036
3037 if ($audio->{dev} eq 'AC97') {
3038 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
3039 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3040 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
3041 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3042 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
3043 } else {
3044 die "unkown audio device '$audio->{dev}', implement me!";
3045 }
3046
3047 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3048
3049 return $devs;
3050 }
3051
3052 sub get_tpm_paths {
3053 my ($vmid) = @_;
3054 return {
3055 socket => "/var/run/qemu-server/$vmid.swtpm",
3056 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3057 };
3058 }
3059
3060 sub add_tpm_device {
3061 my ($vmid, $devices, $conf) = @_;
3062
3063 return if !$conf->{tpmstate0};
3064
3065 my $paths = get_tpm_paths($vmid);
3066
3067 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3068 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3069 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3070 }
3071
3072 sub start_swtpm {
3073 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3074
3075 return if !$tpmdrive;
3076
3077 my $state;
3078 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3079 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3080 if ($storeid) {
3081 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3082 } else {
3083 $state = $tpm->{file};
3084 }
3085
3086 my $paths = get_tpm_paths($vmid);
3087
3088 # during migration, we will get state from remote
3089 #
3090 if (!$migration) {
3091 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3092 my $setup_cmd = [
3093 "swtpm_setup",
3094 "--tpmstate",
3095 "file://$state",
3096 "--createek",
3097 "--create-ek-cert",
3098 "--create-platform-cert",
3099 "--lock-nvram",
3100 "--config",
3101 "/etc/swtpm_setup.conf", # do not use XDG configs
3102 "--runas",
3103 "0", # force creation as root, error if not possible
3104 "--not-overwrite", # ignore existing state, do not modify
3105 ];
3106
3107 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3108 # TPM 2.0 supports ECC crypto, use if possible
3109 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3110
3111 run_command($setup_cmd, outfunc => sub {
3112 print "swtpm_setup: $1\n";
3113 });
3114 }
3115
3116 my $emulator_cmd = [
3117 "swtpm",
3118 "socket",
3119 "--tpmstate",
3120 "backend-uri=file://$state,mode=0600",
3121 "--ctrl",
3122 "type=unixio,path=$paths->{socket},mode=0600",
3123 "--pid",
3124 "file=$paths->{pid}",
3125 "--terminate", # terminate on QEMU disconnect
3126 "--daemon",
3127 ];
3128 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3129 run_command($emulator_cmd, outfunc => sub { print $1; });
3130
3131 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3132 while (! -e $paths->{pid}) {
3133 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3134 usleep(50_000);
3135 }
3136
3137 # return untainted PID of swtpm daemon so it can be killed on error
3138 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3139 return $1;
3140 }
3141
3142 sub vga_conf_has_spice {
3143 my ($vga) = @_;
3144
3145 my $vgaconf = parse_vga($vga);
3146 my $vgatype = $vgaconf->{type};
3147 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3148
3149 return $1 || 1;
3150 }
3151
3152 sub is_native($) {
3153 my ($arch) = @_;
3154 return get_host_arch() eq $arch;
3155 }
3156
3157 sub get_vm_arch {
3158 my ($conf) = @_;
3159 return $conf->{arch} // get_host_arch();
3160 }
3161
3162 my $default_machines = {
3163 x86_64 => 'pc',
3164 aarch64 => 'virt',
3165 };
3166
3167 sub get_installed_machine_version {
3168 my ($kvmversion) = @_;
3169 $kvmversion = kvm_user_version() if !defined($kvmversion);
3170 $kvmversion =~ m/^(\d+\.\d+)/;
3171 return $1;
3172 }
3173
3174 sub windows_get_pinned_machine_version {
3175 my ($machine, $base_version, $kvmversion) = @_;
3176
3177 my $pin_version = $base_version;
3178 if (!defined($base_version) ||
3179 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3180 ) {
3181 $pin_version = get_installed_machine_version($kvmversion);
3182 }
3183 if (!$machine || $machine eq 'pc') {
3184 $machine = "pc-i440fx-$pin_version";
3185 } elsif ($machine eq 'q35') {
3186 $machine = "pc-q35-$pin_version";
3187 } elsif ($machine eq 'virt') {
3188 $machine = "virt-$pin_version";
3189 } else {
3190 warn "unknown machine type '$machine', not touching that!\n";
3191 }
3192
3193 return $machine;
3194 }
3195
3196 sub get_vm_machine {
3197 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3198
3199 my $machine = $forcemachine || $conf->{machine};
3200
3201 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3202 $kvmversion //= kvm_user_version();
3203 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3204 # layout which confuses windows quite a bit and may result in various regressions..
3205 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3206 if (windows_version($conf->{ostype})) {
3207 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3208 }
3209 $arch //= 'x86_64';
3210 $machine ||= $default_machines->{$arch};
3211 if ($add_pve_version) {
3212 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3213 $machine .= "+pve$pvever";
3214 }
3215 }
3216
3217 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3218 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3219 $machine = $1 if $is_pxe;
3220
3221 # for version-pinned machines that do not include a pve-version (e.g.
3222 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3223 $machine .= '+pve0';
3224
3225 $machine .= '.pxe' if $is_pxe;
3226 }
3227
3228 return $machine;
3229 }
3230
3231 sub get_ovmf_files($$$) {
3232 my ($arch, $efidisk, $smm) = @_;
3233
3234 my $types = $OVMF->{$arch}
3235 or die "no OVMF images known for architecture '$arch'\n";
3236
3237 my $type = 'default';
3238 if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3239 $type = $smm ? "4m" : "4m-no-smm";
3240 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3241 }
3242
3243 return $types->{$type}->@*;
3244 }
3245
3246 my $Arch2Qemu = {
3247 aarch64 => '/usr/bin/qemu-system-aarch64',
3248 x86_64 => '/usr/bin/qemu-system-x86_64',
3249 };
3250 sub get_command_for_arch($) {
3251 my ($arch) = @_;
3252 return '/usr/bin/kvm' if is_native($arch);
3253
3254 my $cmd = $Arch2Qemu->{$arch}
3255 or die "don't know how to emulate architecture '$arch'\n";
3256 return $cmd;
3257 }
3258
3259 # To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3260 # to use in a QEMU command line (-cpu element), first array_intersect the result
3261 # of query_supported_ with query_understood_. This is necessary because:
3262 #
3263 # a) query_understood_ returns flags the host cannot use and
3264 # b) query_supported_ (rather the QMP call) doesn't actually return CPU
3265 # flags, but CPU settings - with most of them being flags. Those settings
3266 # (and some flags, curiously) cannot be specified as a "-cpu" argument.
3267 #
3268 # query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3269 # expensive. If you need the value returned from this, you can get it much
3270 # cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3271 # $accel being 'kvm' or 'tcg'.
3272 #
3273 # pvestatd calls this function on startup and whenever the QEMU/KVM version
3274 # changes, automatically populating pmxcfs.
3275 #
3276 # Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3277 # since kvm and tcg machines support different flags
3278 #
3279 sub query_supported_cpu_flags {
3280 my ($arch) = @_;
3281
3282 $arch //= get_host_arch();
3283 my $default_machine = $default_machines->{$arch};
3284
3285 my $flags = {};
3286
3287 # FIXME: Once this is merged, the code below should work for ARM as well:
3288 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3289 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3290 $arch eq "aarch64";
3291
3292 my $kvm_supported = defined(kvm_version());
3293 my $qemu_cmd = get_command_for_arch($arch);
3294 my $fakevmid = -1;
3295 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3296
3297 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3298 my $query_supported_run_qemu = sub {
3299 my ($kvm) = @_;
3300
3301 my $flags = {};
3302 my $cmd = [
3303 $qemu_cmd,
3304 '-machine', $default_machine,
3305 '-display', 'none',
3306 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3307 '-mon', 'chardev=qmp,mode=control',
3308 '-pidfile', $pidfile,
3309 '-S', '-daemonize'
3310 ];
3311
3312 if (!$kvm) {
3313 push @$cmd, '-accel', 'tcg';
3314 }
3315
3316 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3317 die "QEMU flag querying VM exited with code " . $rc if $rc;
3318
3319 eval {
3320 my $cmd_result = mon_cmd(
3321 $fakevmid,
3322 'query-cpu-model-expansion',
3323 type => 'full',
3324 model => { name => 'host' }
3325 );
3326
3327 my $props = $cmd_result->{model}->{props};
3328 foreach my $prop (keys %$props) {
3329 next if $props->{$prop} ne '1';
3330 # QEMU returns some flags multiple times, with '_', '.' or '-'
3331 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3332 # We only keep those with underscores, to match /proc/cpuinfo
3333 $prop =~ s/\.|-/_/g;
3334 $flags->{$prop} = 1;
3335 }
3336 };
3337 my $err = $@;
3338
3339 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3340 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3341
3342 die $err if $err;
3343
3344 return [ sort keys %$flags ];
3345 };
3346
3347 # We need to query QEMU twice, since KVM and TCG have different supported flags
3348 PVE::QemuConfig->lock_config($fakevmid, sub {
3349 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3350 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3351
3352 if ($kvm_supported) {
3353 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3354 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3355 }
3356 });
3357
3358 return $flags;
3359 }
3360
3361 # Understood CPU flags are written to a file at 'pve-qemu' compile time
3362 my $understood_cpu_flag_dir = "/usr/share/kvm";
3363 sub query_understood_cpu_flags {
3364 my $arch = get_host_arch();
3365 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3366
3367 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3368 if ! -e $filepath;
3369
3370 my $raw = file_get_contents($filepath);
3371 $raw =~ s/^\s+|\s+$//g;
3372 my @flags = split(/\s+/, $raw);
3373
3374 return \@flags;
3375 }
3376
3377 my sub get_cpuunits {
3378 my ($conf) = @_;
3379 return $conf->{cpuunits} // (PVE::CGroup::cgroup_mode() == 2 ? 100 : 1024);
3380 }
3381 sub config_to_command {
3382 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3383 $pbs_backing) = @_;
3384
3385 my $cmd = [];
3386 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3387 my $devices = [];
3388 my $bridges = {};
3389 my $ostype = $conf->{ostype};
3390 my $winversion = windows_version($ostype);
3391 my $kvm = $conf->{kvm};
3392 my $nodename = nodename();
3393
3394 my $arch = get_vm_arch($conf);
3395 my $kvm_binary = get_command_for_arch($arch);
3396 my $kvmver = kvm_user_version($kvm_binary);
3397
3398 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3399 $kvmver //= "undefined";
3400 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3401 }
3402
3403 my $add_pve_version = min_version($kvmver, 4, 1);
3404
3405 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3406 my $machine_version = extract_version($machine_type, $kvmver);
3407 $kvm //= 1 if is_native($arch);
3408
3409 $machine_version =~ m/(\d+)\.(\d+)/;
3410 my ($machine_major, $machine_minor) = ($1, $2);
3411
3412 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3413 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3414 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3415 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3416 ." please upgrade node '$nodename'\n"
3417 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3418 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3419 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3420 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3421 ." node '$nodename'\n";
3422 }
3423
3424 # if a specific +pve version is required for a feature, use $version_guard
3425 # instead of min_version to allow machines to be run with the minimum
3426 # required version
3427 my $required_pve_version = 0;
3428 my $version_guard = sub {
3429 my ($major, $minor, $pve) = @_;
3430 return 0 if !min_version($machine_version, $major, $minor, $pve);
3431 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3432 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3433 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3434 return 1;
3435 };
3436
3437 if ($kvm && !defined kvm_version()) {
3438 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3439 ." or enable in BIOS.\n";
3440 }
3441
3442 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3443 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3444 my $use_old_bios_files = undef;
3445 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3446
3447 my $cpuunits = get_cpuunits($conf);
3448
3449 push @$cmd, $kvm_binary;
3450
3451 push @$cmd, '-id', $vmid;
3452
3453 my $vmname = $conf->{name} || "vm$vmid";
3454
3455 push @$cmd, '-name', $vmname;
3456
3457 push @$cmd, '-no-shutdown';
3458
3459 my $use_virtio = 0;
3460
3461 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3462 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3463 push @$cmd, '-mon', "chardev=qmp,mode=control";
3464
3465 if (min_version($machine_version, 2, 12)) {
3466 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3467 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3468 }
3469
3470 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3471
3472 push @$cmd, '-daemonize';
3473
3474 if ($conf->{smbios1}) {
3475 my $smbios_conf = parse_smbios1($conf->{smbios1});
3476 if ($smbios_conf->{base64}) {
3477 # Do not pass base64 flag to qemu
3478 delete $smbios_conf->{base64};
3479 my $smbios_string = "";
3480 foreach my $key (keys %$smbios_conf) {
3481 my $value;
3482 if ($key eq "uuid") {
3483 $value = $smbios_conf->{uuid}
3484 } else {
3485 $value = decode_base64($smbios_conf->{$key});
3486 }
3487 # qemu accepts any binary data, only commas need escaping by double comma
3488 $value =~ s/,/,,/g;
3489 $smbios_string .= "," . $key . "=" . $value if $value;
3490 }
3491 push @$cmd, '-smbios', "type=1" . $smbios_string;
3492 } else {
3493 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3494 }
3495 }
3496
3497 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3498 my $d;
3499 if (my $efidisk = $conf->{efidisk0}) {
3500 $d = parse_drive('efidisk0', $efidisk);
3501 }
3502
3503 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3504 die "uefi base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3505
3506 my ($path, $format);
3507 my $read_only_str = '';
3508 if ($d) {
3509 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3510 $format = $d->{format};
3511 if ($storeid) {
3512 $path = PVE::Storage::path($storecfg, $d->{file});
3513 if (!defined($format)) {
3514 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3515 $format = qemu_img_format($scfg, $volname);
3516 }
3517 } else {
3518 $path = $d->{file};
3519 die "efidisk format must be specified\n"
3520 if !defined($format);
3521 }
3522
3523 $read_only_str = ',readonly=on' if drive_is_read_only($conf, $d);
3524 } else {
3525 warn "no efidisk configured! Using temporary efivars disk.\n";
3526 $path = "/tmp/$vmid-ovmf.fd";
3527 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3528 $format = 'raw';
3529 }
3530
3531 my $size_str = "";
3532
3533 if ($format eq 'raw' && $version_guard->(4, 1, 2)) {
3534 $size_str = ",size=" . (-s $ovmf_vars);
3535 }
3536
3537 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3538 my $cache = "";
3539 if ($path =~ m/^rbd:/) {
3540 $cache = ',cache=writeback';
3541 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3542 }
3543
3544 push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code";
3545 push @$cmd, '-drive', "if=pflash,unit=1$cache,format=$format,id=drive-efidisk0$size_str,file=${path}${read_only_str}";
3546 }
3547
3548 if ($q35) { # tell QEMU to load q35 config early
3549 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3550 if (min_version($machine_version, 4, 0)) {
3551 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3552 } else {
3553 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3554 }
3555 }
3556
3557 if ($conf->{vmgenid}) {
3558 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3559 }
3560
3561 # add usb controllers
3562 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3563 $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES);
3564 push @$devices, @usbcontrollers if @usbcontrollers;
3565 my $vga = parse_vga($conf->{vga});
3566
3567 my $qxlnum = vga_conf_has_spice($conf->{vga});
3568 $vga->{type} = 'qxl' if $qxlnum;
3569
3570 if (!$vga->{type}) {
3571 if ($arch eq 'aarch64') {
3572 $vga->{type} = 'virtio';
3573 } elsif (min_version($machine_version, 2, 9)) {
3574 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3575 } else {
3576 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3577 }
3578 }
3579
3580 # enable absolute mouse coordinates (needed by vnc)
3581 my $tablet = $conf->{tablet};
3582 if (!defined($tablet)) {
3583 $tablet = $defaults->{tablet};
3584 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3585 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3586 }
3587
3588 if ($tablet) {
3589 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3590 my $kbd = print_keyboarddevice_full($conf, $arch);
3591 push @$devices, '-device', $kbd if defined($kbd);
3592 }
3593
3594 my $bootorder = device_bootorder($conf);
3595
3596 # host pci device passthrough
3597 my ($kvm_off, $gpu_passthrough, $legacy_igd) = PVE::QemuServer::PCI::print_hostpci_devices(
3598 $vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder);
3599
3600 # usb devices
3601 my $usb_dev_features = {};
3602 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3603
3604 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3605 $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder);
3606 push @$devices, @usbdevices if @usbdevices;
3607
3608 # serial devices
3609 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3610 my $path = $conf->{"serial$i"} or next;
3611 if ($path eq 'socket') {
3612 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3613 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3614 # On aarch64, serial0 is the UART device. Qemu only allows
3615 # connecting UART devices via the '-serial' command line, as
3616 # the device has a fixed slot on the hardware...
3617 if ($arch eq 'aarch64' && $i == 0) {
3618 push @$devices, '-serial', "chardev:serial$i";
3619 } else {
3620 push @$devices, '-device', "isa-serial,chardev=serial$i";
3621 }
3622 } else {
3623 die "no such serial device\n" if ! -c $path;
3624 push @$devices, '-chardev', "tty,id=serial$i,path=$path";
3625 push @$devices, '-device', "isa-serial,chardev=serial$i";
3626 }
3627 }
3628
3629 # parallel devices
3630 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3631 if (my $path = $conf->{"parallel$i"}) {
3632 die "no such parallel device\n" if ! -c $path;
3633 my $devtype = $path =~ m!^/dev/usb/lp! ? 'tty' : 'parport';
3634 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3635 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3636 }
3637 }
3638
3639 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3640 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3641 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3642 push @$devices, @$audio_devs;
3643 }
3644
3645 add_tpm_device($vmid, $devices, $conf);
3646
3647 my $sockets = 1;
3648 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3649 $sockets = $conf->{sockets} if $conf->{sockets};
3650
3651 my $cores = $conf->{cores} || 1;
3652
3653 my $maxcpus = $sockets * $cores;
3654
3655 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3656
3657 my $allowed_vcpus = $cpuinfo->{cpus};
3658
3659 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3660
3661 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3662 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3663 for (my $i = 2; $i <= $vcpus; $i++) {
3664 my $cpustr = print_cpu_device($conf,$i);
3665 push @$cmd, '-device', $cpustr;
3666 }
3667
3668 } else {
3669
3670 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3671 }
3672 push @$cmd, '-nodefaults';
3673
3674 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3675
3676 push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3677
3678 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3679
3680 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3681 push @$devices, '-device', print_vga_device(
3682 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3683 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3684 push @$cmd, '-vnc', "unix:$socket,password=on";
3685 } else {
3686 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3687 push @$cmd, '-nographic';
3688 }
3689
3690 # time drift fix
3691 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3692 my $useLocaltime = $conf->{localtime};
3693
3694 if ($winversion >= 5) { # windows
3695 $useLocaltime = 1 if !defined($conf->{localtime});
3696
3697 # use time drift fix when acpi is enabled
3698 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3699 $tdf = 1 if !defined($conf->{tdf});
3700 }
3701 }
3702
3703 if ($winversion >= 6) {
3704 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3705 push @$cmd, '-no-hpet';
3706 }
3707
3708 push @$rtcFlags, 'driftfix=slew' if $tdf;
3709
3710 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3711 push @$rtcFlags, "base=$conf->{startdate}";
3712 } elsif ($useLocaltime) {
3713 push @$rtcFlags, 'base=localtime';
3714 }
3715
3716 if ($forcecpu) {
3717 push @$cmd, '-cpu', $forcecpu;
3718 } else {
3719 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3720 }
3721
3722 PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
3723
3724 push @$cmd, '-S' if $conf->{freeze};
3725
3726 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3727
3728 my $guest_agent = parse_guest_agent($conf);
3729
3730 if ($guest_agent->{enabled}) {
3731 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3732 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3733
3734 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3735 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3736 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3737 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3738 } elsif ($guest_agent->{type} eq 'isa') {
3739 push @$devices, '-device', "isa-serial,chardev=qga0";
3740 }
3741 }
3742
3743 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3744 if ($rng && $version_guard->(4, 1, 2)) {
3745 check_rng_source($rng->{source});
3746
3747 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3748 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3749 my $limiter_str = "";
3750 if ($max_bytes) {
3751 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3752 }
3753
3754 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3755 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3756 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3757 }
3758
3759 my $spice_port;
3760
3761 if ($qxlnum) {
3762 if ($qxlnum > 1) {
3763 if ($winversion){
3764 for (my $i = 1; $i < $qxlnum; $i++){
3765 push @$devices, '-device', print_vga_device(
3766 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3767 }
3768 } else {
3769 # assume other OS works like Linux
3770 my ($ram, $vram) = ("134217728", "67108864");
3771 if ($vga->{memory}) {
3772 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3773 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3774 }
3775 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3776 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3777 }
3778 }
3779
3780 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3781
3782 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3783 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3784 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3785
3786 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3787 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3788 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3789
3790 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3791 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3792
3793 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3794 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3795 if ($spice_enhancement->{foldersharing}) {
3796 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3797 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3798 }
3799
3800 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3801 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3802 if $spice_enhancement->{videostreaming};
3803
3804 push @$devices, '-spice', "$spice_opts";
3805 }
3806
3807 # enable balloon by default, unless explicitly disabled
3808 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3809 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3810 push @$devices, '-device', "virtio-balloon-pci,id=balloon0$pciaddr";
3811 }
3812
3813 if ($conf->{watchdog}) {
3814 my $wdopts = parse_watchdog($conf->{watchdog});
3815 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3816 my $watchdog = $wdopts->{model} || 'i6300esb';
3817 push @$devices, '-device', "$watchdog$pciaddr";
3818 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3819 }
3820
3821 my $vollist = [];
3822 my $scsicontroller = {};
3823 my $ahcicontroller = {};
3824 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3825
3826 # Add iscsi initiator name if available
3827 if (my $initiator = get_initiator_name()) {
3828 push @$devices, '-iscsi', "initiator-name=$initiator";
3829 }
3830
3831 PVE::QemuConfig->foreach_volume($conf, sub {
3832 my ($ds, $drive) = @_;
3833
3834 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3835 check_volume_storage_type($storecfg, $drive->{file});
3836 push @$vollist, $drive->{file};
3837 }
3838
3839 # ignore efidisk here, already added in bios/fw handling code above
3840 return if $drive->{interface} eq 'efidisk';
3841 # similar for TPM
3842 return if $drive->{interface} eq 'tpmstate';
3843
3844 $use_virtio = 1 if $ds =~ m/^virtio/;
3845
3846 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3847
3848 if ($drive->{interface} eq 'virtio'){
3849 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
3850 }
3851
3852 if ($drive->{interface} eq 'scsi') {
3853
3854 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
3855
3856 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
3857 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
3858
3859 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
3860 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
3861
3862 my $iothread = '';
3863 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
3864 $iothread .= ",iothread=iothread-$controller_prefix$controller";
3865 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
3866 } elsif ($drive->{iothread}) {
3867 warn "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n";
3868 }
3869
3870 my $queues = '';
3871 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
3872 $queues = ",num_queues=$drive->{queues}";
3873 }
3874
3875 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
3876 if !$scsicontroller->{$controller};
3877 $scsicontroller->{$controller}=1;
3878 }
3879
3880 if ($drive->{interface} eq 'sata') {
3881 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
3882 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
3883 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
3884 if !$ahcicontroller->{$controller};
3885 $ahcicontroller->{$controller}=1;
3886 }
3887
3888 my $pbs_conf = $pbs_backing->{$ds};
3889 my $pbs_name = undef;
3890 if ($pbs_conf) {
3891 $pbs_name = "drive-$ds-pbs";
3892 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
3893 }
3894
3895 my $drive_cmd = print_drive_commandline_full(
3896 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
3897
3898 # extra protection for templates, but SATA and IDE don't support it..
3899 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
3900
3901 push @$devices, '-drive',$drive_cmd;
3902 push @$devices, '-device', print_drivedevice_full(
3903 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
3904 });
3905
3906 for (my $i = 0; $i < $MAX_NETS; $i++) {
3907 my $netname = "net$i";
3908
3909 next if !$conf->{$netname};
3910 my $d = parse_net($conf->{$netname});
3911 next if !$d;
3912
3913 $use_virtio = 1 if $d->{model} eq 'virtio';
3914
3915 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
3916
3917 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
3918 push @$devices, '-netdev', $netdevfull;
3919
3920 my $netdevicefull = print_netdevice_full(
3921 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type);
3922
3923 push @$devices, '-device', $netdevicefull;
3924 }
3925
3926 if ($conf->{ivshmem}) {
3927 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
3928
3929 my $bus;
3930 if ($q35) {
3931 $bus = print_pcie_addr("ivshmem");
3932 } else {
3933 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
3934 }
3935
3936 my $ivshmem_name = $ivshmem->{name} // $vmid;
3937 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
3938
3939 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
3940 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
3941 .",size=$ivshmem->{size}M";
3942 }
3943
3944 # pci.4 is nested in pci.1
3945 $bridges->{1} = 1 if $bridges->{4};
3946
3947 if (!$q35) { # add pci bridges
3948 if (min_version($machine_version, 2, 3)) {
3949 $bridges->{1} = 1;
3950 $bridges->{2} = 1;
3951 }
3952 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
3953 }
3954
3955 for my $k (sort {$b cmp $a} keys %$bridges) {
3956 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
3957
3958 my $k_name = $k;
3959 if ($k == 2 && $legacy_igd) {
3960 $k_name = "$k-igd";
3961 }
3962 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
3963 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
3964
3965 if ($q35) { # add after -readconfig pve-q35.cfg
3966 splice @$devices, 2, 0, '-device', $devstr;
3967 } else {
3968 unshift @$devices, '-device', $devstr if $k > 0;
3969 }
3970 }
3971
3972 if (!$kvm) {
3973 push @$machineFlags, 'accel=tcg';
3974 }
3975
3976 my $machine_type_min = $machine_type;
3977 if ($add_pve_version) {
3978 $machine_type_min =~ s/\+pve\d+$//;
3979 $machine_type_min .= "+pve$required_pve_version";
3980 }
3981 push @$machineFlags, "type=${machine_type_min}";
3982
3983 push @$cmd, @$devices;
3984 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
3985 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
3986 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
3987
3988 if (my $vmstate = $conf->{vmstate}) {
3989 my $statepath = PVE::Storage::path($storecfg, $vmstate);
3990 push @$vollist, $vmstate;
3991 push @$cmd, '-loadstate', $statepath;
3992 print "activating and using '$vmstate' as vmstate\n";
3993 }
3994
3995 if (PVE::QemuConfig->is_template($conf)) {
3996 # needed to workaround base volumes being read-only
3997 push @$cmd, '-snapshot';
3998 }
3999
4000 # add custom args
4001 if ($conf->{args}) {
4002 my $aa = PVE::Tools::split_args($conf->{args});
4003 push @$cmd, @$aa;
4004 }
4005
4006 return wantarray ? ($cmd, $vollist, $spice_port) : $cmd;
4007 }
4008
4009 sub check_rng_source {
4010 my ($source) = @_;
4011
4012 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4013 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4014 if ! -e $source;
4015
4016 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4017 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4018 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4019 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4020 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4021 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4022 ." to the host.\n";
4023 }
4024 }
4025
4026 sub spice_port {
4027 my ($vmid) = @_;
4028
4029 my $res = mon_cmd($vmid, 'query-spice');
4030
4031 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
4032 }
4033
4034 sub vm_devices_list {
4035 my ($vmid) = @_;
4036
4037 my $res = mon_cmd($vmid, 'query-pci');
4038 my $devices_to_check = [];
4039 my $devices = {};
4040 foreach my $pcibus (@$res) {
4041 push @$devices_to_check, @{$pcibus->{devices}},
4042 }
4043
4044 while (@$devices_to_check) {
4045 my $to_check = [];
4046 for my $d (@$devices_to_check) {
4047 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
4048 next if !$d->{'pci_bridge'};
4049
4050 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4051 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
4052 }
4053 $devices_to_check = $to_check;
4054 }
4055
4056 my $resblock = mon_cmd($vmid, 'query-block');
4057 foreach my $block (@$resblock) {
4058 if($block->{device} =~ m/^drive-(\S+)/){
4059 $devices->{$1} = 1;
4060 }
4061 }
4062
4063 my $resmice = mon_cmd($vmid, 'query-mice');
4064 foreach my $mice (@$resmice) {
4065 if ($mice->{name} eq 'QEMU HID Tablet') {
4066 $devices->{tablet} = 1;
4067 last;
4068 }
4069 }
4070
4071 # for usb devices there is no query-usb
4072 # but we can iterate over the entries in
4073 # qom-list path=/machine/peripheral
4074 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4075 foreach my $per (@$resperipheral) {
4076 if ($per->{name} =~ m/^usb\d+$/) {
4077 $devices->{$per->{name}} = 1;
4078 }
4079 }
4080
4081 return $devices;
4082 }
4083
4084 sub vm_deviceplug {
4085 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4086
4087 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4088
4089 my $devices_list = vm_devices_list($vmid);
4090 return 1 if defined($devices_list->{$deviceid});
4091
4092 # add PCI bridge if we need it for the device
4093 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4094
4095 if ($deviceid eq 'tablet') {
4096 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4097 } elsif ($deviceid eq 'keyboard') {
4098 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4099 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4100 die "usb hotplug currently not reliable\n";
4101 # since we can't reliably hot unplug all added usb devices and usb
4102 # passthrough breaks live migration we disable usb hotplugging for now
4103 #qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device));
4104 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4105 qemu_iothread_add($vmid, $deviceid, $device);
4106
4107 qemu_driveadd($storecfg, $vmid, $device);
4108 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4109
4110 qemu_deviceadd($vmid, $devicefull);
4111 eval { qemu_deviceaddverify($vmid, $deviceid); };
4112 if (my $err = $@) {
4113 eval { qemu_drivedel($vmid, $deviceid); };
4114 warn $@ if $@;
4115 die $err;
4116 }
4117 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4118 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4119 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4120 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4121
4122 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4123
4124 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4125 qemu_iothread_add($vmid, $deviceid, $device);
4126 $devicefull .= ",iothread=iothread-$deviceid";
4127 }
4128
4129 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4130 $devicefull .= ",num_queues=$device->{queues}";
4131 }
4132
4133 qemu_deviceadd($vmid, $devicefull);
4134 qemu_deviceaddverify($vmid, $deviceid);
4135 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4136 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4137 qemu_driveadd($storecfg, $vmid, $device);
4138
4139 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4140 eval { qemu_deviceadd($vmid, $devicefull); };
4141 if (my $err = $@) {
4142 eval { qemu_drivedel($vmid, $deviceid); };
4143 warn $@ if $@;
4144 die $err;
4145 }
4146 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4147 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4148
4149 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4150 my $use_old_bios_files = undef;
4151 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4152
4153 my $netdevicefull = print_netdevice_full(
4154 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type);
4155 qemu_deviceadd($vmid, $netdevicefull);
4156 eval {
4157 qemu_deviceaddverify($vmid, $deviceid);
4158 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4159 };
4160 if (my $err = $@) {
4161 eval { qemu_netdevdel($vmid, $deviceid); };
4162 warn $@ if $@;
4163 die $err;
4164 }
4165 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4166 my $bridgeid = $2;
4167 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4168 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4169
4170 qemu_deviceadd($vmid, $devicefull);
4171 qemu_deviceaddverify($vmid, $deviceid);
4172 } else {
4173 die "can't hotplug device '$deviceid'\n";
4174 }
4175
4176 return 1;
4177 }
4178
4179 # fixme: this should raise exceptions on error!
4180 sub vm_deviceunplug {
4181 my ($vmid, $conf, $deviceid) = @_;
4182
4183 my $devices_list = vm_devices_list($vmid);
4184 return 1 if !defined($devices_list->{$deviceid});
4185
4186 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4187 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4188
4189 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard') {
4190 qemu_devicedel($vmid, $deviceid);
4191 } elsif ($deviceid =~ m/^usb\d+$/) {
4192 die "usb hotplug currently not reliable\n";
4193 # when unplugging usb devices this way, there may be remaining usb
4194 # controllers/hubs so we disable it for now
4195 #qemu_devicedel($vmid, $deviceid);
4196 #qemu_devicedelverify($vmid, $deviceid);
4197 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4198 my $device = parse_drive($deviceid, $conf->{$deviceid});
4199
4200 qemu_devicedel($vmid, $deviceid);
4201 qemu_devicedelverify($vmid, $deviceid);
4202 qemu_drivedel($vmid, $deviceid);
4203 qemu_iothread_del($vmid, $deviceid, $device);
4204 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4205 qemu_devicedel($vmid, $deviceid);
4206 qemu_devicedelverify($vmid, $deviceid);
4207 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4208 my $device = parse_drive($deviceid, $conf->{$deviceid});
4209
4210 qemu_devicedel($vmid, $deviceid);
4211 qemu_drivedel($vmid, $deviceid);
4212 qemu_deletescsihw($conf, $vmid, $deviceid);
4213
4214 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4215 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4216 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4217 qemu_devicedel($vmid, $deviceid);
4218 qemu_devicedelverify($vmid, $deviceid);
4219 qemu_netdevdel($vmid, $deviceid);
4220 } else {
4221 die "can't unplug device '$deviceid'\n";
4222 }
4223
4224 return 1;
4225 }
4226
4227 sub qemu_deviceadd {
4228 my ($vmid, $devicefull) = @_;
4229
4230 $devicefull = "driver=".$devicefull;
4231 my %options = split(/[=,]/, $devicefull);
4232
4233 mon_cmd($vmid, "device_add" , %options);
4234 }
4235
4236 sub qemu_devicedel {
4237 my ($vmid, $deviceid) = @_;
4238
4239 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
4240 }
4241
4242 sub qemu_iothread_add {
4243 my ($vmid, $deviceid, $device) = @_;
4244
4245 if ($device->{iothread}) {
4246 my $iothreads = vm_iothreads_list($vmid);
4247 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4248 }
4249 }
4250
4251 sub qemu_iothread_del {
4252 my ($vmid, $deviceid, $device) = @_;
4253
4254 if ($device->{iothread}) {
4255 my $iothreads = vm_iothreads_list($vmid);
4256 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4257 }
4258 }
4259
4260 sub qemu_objectadd {
4261 my ($vmid, $objectid, $qomtype) = @_;
4262
4263 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4264
4265 return 1;
4266 }
4267
4268 sub qemu_objectdel {
4269 my ($vmid, $objectid) = @_;
4270
4271 mon_cmd($vmid, "object-del", id => $objectid);
4272
4273 return 1;
4274 }
4275
4276 sub qemu_driveadd {
4277 my ($storecfg, $vmid, $device) = @_;
4278
4279 my $kvmver = get_running_qemu_version($vmid);
4280 my $io_uring = min_version($kvmver, 6, 0);
4281 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4282 $drive =~ s/\\/\\\\/g;
4283 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4284
4285 # If the command succeeds qemu prints: "OK"
4286 return 1 if $ret =~ m/OK/s;
4287
4288 die "adding drive failed: $ret\n";
4289 }
4290
4291 sub qemu_drivedel {
4292 my ($vmid, $deviceid) = @_;
4293
4294 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4295 $ret =~ s/^\s+//;
4296
4297 return 1 if $ret eq "";
4298
4299 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4300 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4301
4302 die "deleting drive $deviceid failed : $ret\n";
4303 }
4304
4305 sub qemu_deviceaddverify {
4306 my ($vmid, $deviceid) = @_;
4307
4308 for (my $i = 0; $i <= 5; $i++) {
4309 my $devices_list = vm_devices_list($vmid);
4310 return 1 if defined($devices_list->{$deviceid});
4311 sleep 1;
4312 }
4313
4314 die "error on hotplug device '$deviceid'\n";
4315 }
4316
4317
4318 sub qemu_devicedelverify {
4319 my ($vmid, $deviceid) = @_;
4320
4321 # need to verify that the device is correctly removed as device_del
4322 # is async and empty return is not reliable
4323
4324 for (my $i = 0; $i <= 5; $i++) {
4325 my $devices_list = vm_devices_list($vmid);
4326 return 1 if !defined($devices_list->{$deviceid});
4327 sleep 1;
4328 }
4329
4330 die "error on hot-unplugging device '$deviceid'\n";
4331 }
4332
4333 sub qemu_findorcreatescsihw {
4334 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4335
4336 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4337
4338 my $scsihwid="$controller_prefix$controller";
4339 my $devices_list = vm_devices_list($vmid);
4340
4341 if (!defined($devices_list->{$scsihwid})) {
4342 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4343 }
4344
4345 return 1;
4346 }
4347
4348 sub qemu_deletescsihw {
4349 my ($conf, $vmid, $opt) = @_;
4350
4351 my $device = parse_drive($opt, $conf->{$opt});
4352
4353 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4354 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4355 return 1;
4356 }
4357
4358 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4359
4360 my $devices_list = vm_devices_list($vmid);
4361 foreach my $opt (keys %{$devices_list}) {
4362 if (is_valid_drivename($opt)) {
4363 my $drive = parse_drive($opt, $conf->{$opt});
4364 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4365 return 1;
4366 }
4367 }
4368 }
4369
4370 my $scsihwid="scsihw$controller";
4371
4372 vm_deviceunplug($vmid, $conf, $scsihwid);
4373
4374 return 1;
4375 }
4376
4377 sub qemu_add_pci_bridge {
4378 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4379
4380 my $bridges = {};
4381
4382 my $bridgeid;
4383
4384 print_pci_addr($device, $bridges, $arch, $machine_type);
4385
4386 while (my ($k, $v) = each %$bridges) {
4387 $bridgeid = $k;
4388 }
4389 return 1 if !defined($bridgeid) || $bridgeid < 1;
4390
4391 my $bridge = "pci.$bridgeid";
4392 my $devices_list = vm_devices_list($vmid);
4393
4394 if (!defined($devices_list->{$bridge})) {
4395 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4396 }
4397
4398 return 1;
4399 }
4400
4401 sub qemu_set_link_status {
4402 my ($vmid, $device, $up) = @_;
4403
4404 mon_cmd($vmid, "set_link", name => $device,
4405 up => $up ? JSON::true : JSON::false);
4406 }
4407
4408 sub qemu_netdevadd {
4409 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4410
4411 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4412 my %options = split(/[=,]/, $netdev);
4413
4414 if (defined(my $vhost = $options{vhost})) {
4415 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4416 }
4417
4418 if (defined(my $queues = $options{queues})) {
4419 $options{queues} = $queues + 0;
4420 }
4421
4422 mon_cmd($vmid, "netdev_add", %options);
4423 return 1;
4424 }
4425
4426 sub qemu_netdevdel {
4427 my ($vmid, $deviceid) = @_;
4428
4429 mon_cmd($vmid, "netdev_del", id => $deviceid);
4430 }
4431
4432 sub qemu_usb_hotplug {
4433 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4434
4435 return if !$device;
4436
4437 # remove the old one first
4438 vm_deviceunplug($vmid, $conf, $deviceid);
4439
4440 # check if xhci controller is necessary and available
4441 if ($device->{usb3}) {
4442
4443 my $devicelist = vm_devices_list($vmid);
4444
4445 if (!$devicelist->{xhci}) {
4446 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4447 qemu_deviceadd($vmid, "nec-usb-xhci,id=xhci$pciaddr");
4448 }
4449 }
4450 my $d = parse_usb_device($device->{host});
4451 $d->{usb3} = $device->{usb3};
4452
4453 # add the new one
4454 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d, $arch, $machine_type);
4455 }
4456
4457 sub qemu_cpu_hotplug {
4458 my ($vmid, $conf, $vcpus) = @_;
4459
4460 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4461
4462 my $sockets = 1;
4463 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4464 $sockets = $conf->{sockets} if $conf->{sockets};
4465 my $cores = $conf->{cores} || 1;
4466 my $maxcpus = $sockets * $cores;
4467
4468 $vcpus = $maxcpus if !$vcpus;
4469
4470 die "you can't add more vcpus than maxcpus\n"
4471 if $vcpus > $maxcpus;
4472
4473 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4474
4475 if ($vcpus < $currentvcpus) {
4476
4477 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4478
4479 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4480 qemu_devicedel($vmid, "cpu$i");
4481 my $retry = 0;
4482 my $currentrunningvcpus = undef;
4483 while (1) {
4484 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4485 last if scalar(@{$currentrunningvcpus}) == $i-1;
4486 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4487 $retry++;
4488 sleep 1;
4489 }
4490 #update conf after each succesfull cpu unplug
4491 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4492 PVE::QemuConfig->write_config($vmid, $conf);
4493 }
4494 } else {
4495 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4496 }
4497
4498 return;
4499 }
4500
4501 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4502 die "vcpus in running vm does not match its configuration\n"
4503 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4504
4505 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4506
4507 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4508 my $cpustr = print_cpu_device($conf, $i);
4509 qemu_deviceadd($vmid, $cpustr);
4510
4511 my $retry = 0;
4512 my $currentrunningvcpus = undef;
4513 while (1) {
4514 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4515 last if scalar(@{$currentrunningvcpus}) == $i;
4516 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4517 sleep 1;
4518 $retry++;
4519 }
4520 #update conf after each succesfull cpu hotplug
4521 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4522 PVE::QemuConfig->write_config($vmid, $conf);
4523 }
4524 } else {
4525
4526 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4527 mon_cmd($vmid, "cpu-add", id => int($i));
4528 }
4529 }
4530 }
4531
4532 sub qemu_block_set_io_throttle {
4533 my ($vmid, $deviceid,
4534 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4535 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4536 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4537 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4538
4539 return if !check_running($vmid) ;
4540
4541 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4542 bps => int($bps),
4543 bps_rd => int($bps_rd),
4544 bps_wr => int($bps_wr),
4545 iops => int($iops),
4546 iops_rd => int($iops_rd),
4547 iops_wr => int($iops_wr),
4548 bps_max => int($bps_max),
4549 bps_rd_max => int($bps_rd_max),
4550 bps_wr_max => int($bps_wr_max),
4551 iops_max => int($iops_max),
4552 iops_rd_max => int($iops_rd_max),
4553 iops_wr_max => int($iops_wr_max),
4554 bps_max_length => int($bps_max_length),
4555 bps_rd_max_length => int($bps_rd_max_length),
4556 bps_wr_max_length => int($bps_wr_max_length),
4557 iops_max_length => int($iops_max_length),
4558 iops_rd_max_length => int($iops_rd_max_length),
4559 iops_wr_max_length => int($iops_wr_max_length),
4560 );
4561
4562 }
4563
4564 sub qemu_block_resize {
4565 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4566
4567 my $running = check_running($vmid);
4568
4569 $size = 0 if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4570
4571 return if !$running;
4572
4573 my $padding = (1024 - $size % 1024) % 1024;
4574 $size = $size + $padding;
4575
4576 mon_cmd(
4577 $vmid,
4578 "block_resize",
4579 device => $deviceid,
4580 size => int($size),
4581 timeout => 60,
4582 );
4583 }
4584
4585 sub qemu_volume_snapshot {
4586 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4587
4588 my $running = check_running($vmid);
4589
4590 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4591 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4592 } else {
4593 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4594 }
4595 }
4596
4597 sub qemu_volume_snapshot_delete {
4598 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4599
4600 my $running = check_running($vmid);
4601
4602 if($running) {
4603
4604 $running = undef;
4605 my $conf = PVE::QemuConfig->load_config($vmid);
4606 PVE::QemuConfig->foreach_volume($conf, sub {
4607 my ($ds, $drive) = @_;
4608 $running = 1 if $drive->{file} eq $volid;
4609 });
4610 }
4611
4612 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4613 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
4614 } else {
4615 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4616 }
4617 }
4618
4619 sub set_migration_caps {
4620 my ($vmid, $savevm) = @_;
4621
4622 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4623
4624 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4625 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4626
4627 my $cap_ref = [];
4628
4629 my $enabled_cap = {
4630 "auto-converge" => 1,
4631 "xbzrle" => 1,
4632 "x-rdma-pin-all" => 0,
4633 "zero-blocks" => 0,
4634 "compress" => 0,
4635 "dirty-bitmaps" => $dirty_bitmaps,
4636 };
4637
4638 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4639
4640 for my $supported_capability (@$supported_capabilities) {
4641 push @$cap_ref, {
4642 capability => $supported_capability->{capability},
4643 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4644 };
4645 }
4646
4647 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4648 }
4649
4650 sub foreach_volid {
4651 my ($conf, $func, @param) = @_;
4652
4653 my $volhash = {};
4654
4655 my $test_volid = sub {
4656 my ($key, $drive, $snapname) = @_;
4657
4658 my $volid = $drive->{file};
4659 return if !$volid;
4660
4661 $volhash->{$volid}->{cdrom} //= 1;
4662 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4663
4664 my $replicate = $drive->{replicate} // 1;
4665 $volhash->{$volid}->{replicate} //= 0;
4666 $volhash->{$volid}->{replicate} = 1 if $replicate;
4667
4668 $volhash->{$volid}->{shared} //= 0;
4669 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4670
4671 $volhash->{$volid}->{referenced_in_config} //= 0;
4672 $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname);
4673
4674 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4675 if defined($snapname);
4676
4677 my $size = $drive->{size};
4678 $volhash->{$volid}->{size} //= $size if $size;
4679
4680 $volhash->{$volid}->{is_vmstate} //= 0;
4681 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4682
4683 $volhash->{$volid}->{is_tpmstate} //= 0;
4684 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4685
4686 $volhash->{$volid}->{is_unused} //= 0;
4687 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4688
4689 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4690 };
4691
4692 my $include_opts = {
4693 extra_keys => ['vmstate'],
4694 include_unused => 1,
4695 };
4696
4697 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4698 foreach my $snapname (keys %{$conf->{snapshots}}) {
4699 my $snap = $conf->{snapshots}->{$snapname};
4700 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4701 }
4702
4703 foreach my $volid (keys %$volhash) {
4704 &$func($volid, $volhash->{$volid}, @param);
4705 }
4706 }
4707
4708 my $fast_plug_option = {
4709 'lock' => 1,
4710 'name' => 1,
4711 'onboot' => 1,
4712 'shares' => 1,
4713 'startup' => 1,
4714 'description' => 1,
4715 'protection' => 1,
4716 'vmstatestorage' => 1,
4717 'hookscript' => 1,
4718 'tags' => 1,
4719 };
4720
4721 # hotplug changes in [PENDING]
4722 # $selection hash can be used to only apply specified options, for
4723 # example: { cores => 1 } (only apply changed 'cores')
4724 # $errors ref is used to return error messages
4725 sub vmconfig_hotplug_pending {
4726 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4727
4728 my $defaults = load_defaults();
4729 my $arch = get_vm_arch($conf);
4730 my $machine_type = get_vm_machine($conf, undef, $arch);
4731
4732 # commit values which do not have any impact on running VM first
4733 # Note: those option cannot raise errors, we we do not care about
4734 # $selection and always apply them.
4735
4736 my $add_error = sub {
4737 my ($opt, $msg) = @_;
4738 $errors->{$opt} = "hotplug problem - $msg";
4739 };
4740
4741 my $changes = 0;
4742 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4743 if ($fast_plug_option->{$opt}) {
4744 $conf->{$opt} = $conf->{pending}->{$opt};
4745 delete $conf->{pending}->{$opt};
4746 $changes = 1;
4747 }
4748 }
4749
4750 if ($changes) {
4751 PVE::QemuConfig->write_config($vmid, $conf);
4752 }
4753
4754 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
4755
4756 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
4757 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4758 foreach my $opt (sort keys %$pending_delete_hash) {
4759 next if $selection && !$selection->{$opt};
4760 my $force = $pending_delete_hash->{$opt}->{force};
4761 eval {
4762 if ($opt eq 'hotplug') {
4763 die "skip\n" if ($conf->{hotplug} =~ /memory/);
4764 } elsif ($opt eq 'tablet') {
4765 die "skip\n" if !$hotplug_features->{usb};
4766 if ($defaults->{tablet}) {
4767 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4768 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4769 if $arch eq 'aarch64';
4770 } else {
4771 vm_deviceunplug($vmid, $conf, 'tablet');
4772 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4773 }
4774 } elsif ($opt =~ m/^usb\d+/) {
4775 die "skip\n";
4776 # since we cannot reliably hot unplug usb devices we are disabling it
4777 #die "skip\n" if !$hotplug_features->{usb} || $conf->{$opt} =~ m/spice/i;
4778 #vm_deviceunplug($vmid, $conf, $opt);
4779 } elsif ($opt eq 'vcpus') {
4780 die "skip\n" if !$hotplug_features->{cpu};
4781 qemu_cpu_hotplug($vmid, $conf, undef);
4782 } elsif ($opt eq 'balloon') {
4783 # enable balloon device is not hotpluggable
4784 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
4785 # here we reset the ballooning value to memory
4786 my $balloon = $conf->{memory} || $defaults->{memory};
4787 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4788 } elsif ($fast_plug_option->{$opt}) {
4789 # do nothing
4790 } elsif ($opt =~ m/^net(\d+)$/) {
4791 die "skip\n" if !$hotplug_features->{network};
4792 vm_deviceunplug($vmid, $conf, $opt);
4793 } elsif (is_valid_drivename($opt)) {
4794 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
4795 vm_deviceunplug($vmid, $conf, $opt);
4796 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4797 } elsif ($opt =~ m/^memory$/) {
4798 die "skip\n" if !$hotplug_features->{memory};
4799 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
4800 } elsif ($opt eq 'cpuunits') {
4801 $cgroup->change_cpu_shares(undef, 1024);
4802 } elsif ($opt eq 'cpulimit') {
4803 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
4804 } else {
4805 die "skip\n";
4806 }
4807 };
4808 if (my $err = $@) {
4809 &$add_error($opt, $err) if $err ne "skip\n";
4810 } else {
4811 delete $conf->{$opt};
4812 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4813 }
4814 }
4815
4816 my ($apply_pending_cloudinit, $apply_pending_cloudinit_done);
4817 $apply_pending_cloudinit = sub {
4818 return if $apply_pending_cloudinit_done; # once is enough
4819 $apply_pending_cloudinit_done = 1; # once is enough
4820
4821 my ($key, $value) = @_;
4822
4823 my @cloudinit_opts = keys %$confdesc_cloudinit;
4824 foreach my $opt (keys %{$conf->{pending}}) {
4825 next if !grep { $_ eq $opt } @cloudinit_opts;
4826 $conf->{$opt} = delete $conf->{pending}->{$opt};
4827 }
4828
4829 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4830 foreach my $opt (sort keys %$pending_delete_hash) {
4831 next if !grep { $_ eq $opt } @cloudinit_opts;
4832 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4833 delete $conf->{$opt};
4834 }
4835
4836 my $new_conf = { %$conf };
4837 $new_conf->{$key} = $value;
4838 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($new_conf, $vmid);
4839 };
4840
4841 foreach my $opt (keys %{$conf->{pending}}) {
4842 next if $selection && !$selection->{$opt};
4843 my $value = $conf->{pending}->{$opt};
4844 eval {
4845 if ($opt eq 'hotplug') {
4846 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
4847 } elsif ($opt eq 'tablet') {
4848 die "skip\n" if !$hotplug_features->{usb};
4849 if ($value == 1) {
4850 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4851 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4852 if $arch eq 'aarch64';
4853 } elsif ($value == 0) {
4854 vm_deviceunplug($vmid, $conf, 'tablet');
4855 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4856 }
4857 } elsif ($opt =~ m/^usb\d+$/) {
4858 die "skip\n";
4859 # since we cannot reliably hot unplug usb devices we disable it for now
4860 #die "skip\n" if !$hotplug_features->{usb} || $value =~ m/spice/i;
4861 #my $d = eval { parse_property_string($usbdesc->{format}, $value) };
4862 #die "skip\n" if !$d;
4863 #qemu_usb_hotplug($storecfg, $conf, $vmid, $opt, $d, $arch, $machine_type);
4864 } elsif ($opt eq 'vcpus') {
4865 die "skip\n" if !$hotplug_features->{cpu};
4866 qemu_cpu_hotplug($vmid, $conf, $value);
4867 } elsif ($opt eq 'balloon') {
4868 # enable/disable balloning device is not hotpluggable
4869 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
4870 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
4871 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
4872
4873 # allow manual ballooning if shares is set to zero
4874 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
4875 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
4876 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4877 }
4878 } elsif ($opt =~ m/^net(\d+)$/) {
4879 # some changes can be done without hotplug
4880 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
4881 $vmid, $opt, $value, $arch, $machine_type);
4882 } elsif (is_valid_drivename($opt)) {
4883 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
4884 # some changes can be done without hotplug
4885 my $drive = parse_drive($opt, $value);
4886 if (drive_is_cloudinit($drive)) {
4887 &$apply_pending_cloudinit($opt, $value);
4888 }
4889 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
4890 $vmid, $opt, $value, $arch, $machine_type);
4891 } elsif ($opt =~ m/^memory$/) { #dimms
4892 die "skip\n" if !$hotplug_features->{memory};
4893 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
4894 } elsif ($opt eq 'cpuunits') {
4895 $cgroup->change_cpu_shares($conf->{pending}->{$opt}, 1024);
4896 } elsif ($opt eq 'cpulimit') {
4897 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
4898 $cgroup->change_cpu_quota($cpulimit, 100000);
4899 } elsif ($opt eq 'agent') {
4900 vmconfig_update_agent($conf, $opt, $value);
4901 } else {
4902 die "skip\n"; # skip non-hot-pluggable options
4903 }
4904 };
4905 if (my $err = $@) {
4906 &$add_error($opt, $err) if $err ne "skip\n";
4907 } else {
4908 $conf->{$opt} = $value;
4909 delete $conf->{pending}->{$opt};
4910 }
4911 }
4912
4913 PVE::QemuConfig->write_config($vmid, $conf);
4914 }
4915
4916 sub try_deallocate_drive {
4917 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
4918
4919 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
4920 my $volid = $drive->{file};
4921 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
4922 my $sid = PVE::Storage::parse_volume_id($volid);
4923 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
4924
4925 # check if the disk is really unused
4926 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
4927 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
4928 PVE::Storage::vdisk_free($storecfg, $volid);
4929 return 1;
4930 } else {
4931 # If vm is not owner of this disk remove from config
4932 return 1;
4933 }
4934 }
4935
4936 return;
4937 }
4938
4939 sub vmconfig_delete_or_detach_drive {
4940 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
4941
4942 my $drive = parse_drive($opt, $conf->{$opt});
4943
4944 my $rpcenv = PVE::RPCEnvironment::get();
4945 my $authuser = $rpcenv->get_user();
4946
4947 if ($force) {
4948 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
4949 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
4950 } else {
4951 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
4952 }
4953 }
4954
4955
4956
4957 sub vmconfig_apply_pending {
4958 my ($vmid, $conf, $storecfg, $errors) = @_;
4959
4960 my $add_apply_error = sub {
4961 my ($opt, $msg) = @_;
4962 my $err_msg = "unable to apply pending change $opt : $msg";
4963 $errors->{$opt} = $err_msg;
4964 warn $err_msg;
4965 };
4966
4967 # cold plug
4968
4969 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4970 foreach my $opt (sort keys %$pending_delete_hash) {
4971 my $force = $pending_delete_hash->{$opt}->{force};
4972 eval {
4973 if ($opt =~ m/^unused/) {
4974 die "internal error";
4975 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
4976 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4977 }
4978 };
4979 if (my $err = $@) {
4980 $add_apply_error->($opt, $err);
4981 } else {
4982 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4983 delete $conf->{$opt};
4984 }
4985 }
4986
4987 PVE::QemuConfig->cleanup_pending($conf);
4988
4989 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4990 next if $opt eq 'delete'; # just to be sure
4991 eval {
4992 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
4993 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
4994 }
4995 };
4996 if (my $err = $@) {
4997 $add_apply_error->($opt, $err);
4998 } else {
4999 $conf->{$opt} = delete $conf->{pending}->{$opt};
5000 }
5001 }
5002
5003 # write all changes at once to avoid unnecessary i/o
5004 PVE::QemuConfig->write_config($vmid, $conf);
5005 }
5006
5007 sub vmconfig_update_net {
5008 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5009
5010 my $newnet = parse_net($value);
5011
5012 if ($conf->{$opt}) {
5013 my $oldnet = parse_net($conf->{$opt});
5014
5015 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5016 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5017 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
5018 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
5019
5020 # for non online change, we try to hot-unplug
5021 die "skip\n" if !$hotplug;
5022 vm_deviceunplug($vmid, $conf, $opt);
5023 } else {
5024
5025 die "internal error" if $opt !~ m/net(\d+)/;
5026 my $iface = "tap${vmid}i$1";
5027
5028 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5029 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5030 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5031 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
5032 PVE::Network::tap_unplug($iface);
5033
5034 if ($have_sdn) {
5035 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5036 } else {
5037 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5038 }
5039 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
5040 # Rate can be applied on its own but any change above needs to
5041 # include the rate in tap_plug since OVS resets everything.
5042 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
5043 }
5044
5045 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
5046 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5047 }
5048
5049 return 1;
5050 }
5051 }
5052
5053 if ($hotplug) {
5054 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5055 } else {
5056 die "skip\n";
5057 }
5058 }
5059
5060 sub vmconfig_update_agent {
5061 my ($conf, $opt, $value) = @_;
5062
5063 die "skip\n" if !$conf->{$opt};
5064
5065 my $hotplug_options = { fstrim_cloned_disks => 1 };
5066
5067 my $old_agent = parse_guest_agent($conf);
5068 my $agent = parse_guest_agent({$opt => $value});
5069
5070 for my $option (keys %$agent) { # added/changed options
5071 next if defined($hotplug_options->{$option});
5072 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5073 }
5074
5075 for my $option (keys %$old_agent) { # removed options
5076 next if defined($hotplug_options->{$option});
5077 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5078 }
5079
5080 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
5081 }
5082
5083 sub vmconfig_update_disk {
5084 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5085
5086 my $drive = parse_drive($opt, $value);
5087
5088 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5089 my $media = $drive->{media} || 'disk';
5090 my $oldmedia = $old_drive->{media} || 'disk';
5091 die "unable to change media type\n" if $media ne $oldmedia;
5092
5093 if (!drive_is_cdrom($old_drive)) {
5094
5095 if ($drive->{file} ne $old_drive->{file}) {
5096
5097 die "skip\n" if !$hotplug;
5098
5099 # unplug and register as unused
5100 vm_deviceunplug($vmid, $conf, $opt);
5101 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5102
5103 } else {
5104 # update existing disk
5105
5106 # skip non hotpluggable value
5107 if (safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5108 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5109 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5110 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5111 safe_string_ne($drive->{ssd}, $old_drive->{ssd})) {
5112 die "skip\n";
5113 }
5114
5115 # apply throttle
5116 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5117 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5118 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5119 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5120 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5121 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5122 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5123 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5124 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5125 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5126 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5127 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5128 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5129 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5130 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5131 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5132 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5133 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5134
5135 qemu_block_set_io_throttle(
5136 $vmid,"drive-$opt",
5137 ($drive->{mbps} || 0)*1024*1024,
5138 ($drive->{mbps_rd} || 0)*1024*1024,
5139 ($drive->{mbps_wr} || 0)*1024*1024,
5140 $drive->{iops} || 0,
5141 $drive->{iops_rd} || 0,
5142 $drive->{iops_wr} || 0,
5143 ($drive->{mbps_max} || 0)*1024*1024,
5144 ($drive->{mbps_rd_max} || 0)*1024*1024,
5145 ($drive->{mbps_wr_max} || 0)*1024*1024,
5146 $drive->{iops_max} || 0,
5147 $drive->{iops_rd_max} || 0,
5148 $drive->{iops_wr_max} || 0,
5149 $drive->{bps_max_length} || 1,
5150 $drive->{bps_rd_max_length} || 1,
5151 $drive->{bps_wr_max_length} || 1,
5152 $drive->{iops_max_length} || 1,
5153 $drive->{iops_rd_max_length} || 1,
5154 $drive->{iops_wr_max_length} || 1,
5155 );
5156
5157 }
5158
5159 return 1;
5160 }
5161
5162 } else { # cdrom
5163
5164 if ($drive->{file} eq 'none') {
5165 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5166 if (drive_is_cloudinit($old_drive)) {
5167 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5168 }
5169 } else {
5170 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5171
5172 # force eject if locked
5173 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5174
5175 if ($path) {
5176 mon_cmd($vmid, "blockdev-change-medium",
5177 id => "$opt", filename => "$path");
5178 }
5179 }
5180
5181 return 1;
5182 }
5183 }
5184
5185 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5186 # hotplug new disks
5187 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5188 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5189 }
5190
5191 # called in locked context by incoming migration
5192 sub vm_migrate_get_nbd_disks {
5193 my ($storecfg, $conf, $replicated_volumes) = @_;
5194
5195 my $local_volumes = {};
5196 PVE::QemuConfig->foreach_volume($conf, sub {
5197 my ($ds, $drive) = @_;
5198
5199 return if drive_is_cdrom($drive);
5200
5201 my $volid = $drive->{file};
5202
5203 return if !$volid;
5204
5205 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5206
5207 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5208 return if $scfg->{shared};
5209
5210 # replicated disks re-use existing state via bitmap
5211 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5212 $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
5213 });
5214 return $local_volumes;
5215 }
5216
5217 # called in locked context by incoming migration
5218 sub vm_migrate_alloc_nbd_disks {
5219 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5220
5221 my $format = undef;
5222
5223 my $nbd = {};
5224 foreach my $opt (sort keys %$source_volumes) {
5225 my ($volid, $storeid, $volname, $drive, $use_existing) = @{$source_volumes->{$opt}};
5226
5227 if ($use_existing) {
5228 $nbd->{$opt}->{drivestr} = print_drive($drive);
5229 $nbd->{$opt}->{volid} = $volid;
5230 $nbd->{$opt}->{replicated} = 1;
5231 next;
5232 }
5233
5234 # If a remote storage is specified and the format of the original
5235 # volume is not available there, fall back to the default format.
5236 # Otherwise use the same format as the original.
5237 if (!$storagemap->{identity}) {
5238 $storeid = map_storage($storagemap, $storeid);
5239 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5240 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5241 my $fileFormat = qemu_img_format($scfg, $volname);
5242 $format = (grep {$fileFormat eq $_} @{$validFormats}) ? $fileFormat : $defFormat;
5243 } else {
5244 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5245 $format = qemu_img_format($scfg, $volname);
5246 }
5247
5248 my $size = $drive->{size} / 1024;
5249 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5250 my $newdrive = $drive;
5251 $newdrive->{format} = $format;
5252 $newdrive->{file} = $newvolid;
5253 my $drivestr = print_drive($newdrive);
5254 $nbd->{$opt}->{drivestr} = $drivestr;
5255 $nbd->{$opt}->{volid} = $newvolid;
5256 }
5257
5258 return $nbd;
5259 }
5260
5261 # see vm_start_nolock for parameters, additionally:
5262 # migrate_opts:
5263 # storagemap = parsed storage map for allocating NBD disks
5264 sub vm_start {
5265 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5266
5267 return PVE::QemuConfig->lock_config($vmid, sub {
5268 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5269
5270 die "you can't start a vm if it's a template\n"
5271 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5272
5273 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5274 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5275
5276 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5277
5278 if ($has_backup_lock && $running) {
5279 # a backup is currently running, attempt to start the guest in the
5280 # existing QEMU instance
5281 return vm_resume($vmid);
5282 }
5283
5284 PVE::QemuConfig->check_lock($conf)
5285 if !($params->{skiplock} || $has_suspended_lock);
5286
5287 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5288
5289 die "VM $vmid already running\n" if $running;
5290
5291 if (my $storagemap = $migrate_opts->{storagemap}) {
5292 my $replicated = $migrate_opts->{replicated_volumes};
5293 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5294 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5295
5296 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5297 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5298 }
5299 }
5300
5301 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5302 });
5303 }
5304
5305
5306 # params:
5307 # statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5308 # skiplock => 0/1, skip checking for config lock
5309 # skiptemplate => 0/1, skip checking whether VM is template
5310 # forcemachine => to force Qemu machine (rollback/migration)
5311 # forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5312 # timeout => in seconds
5313 # paused => start VM in paused state (backup)
5314 # resume => resume from hibernation
5315 # pbs-backing => {
5316 # sata0 => {
5317 # repository
5318 # snapshot
5319 # keyfile
5320 # archive
5321 # },
5322 # virtio2 => ...
5323 # }
5324 # migrate_opts:
5325 # nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5326 # migratedfrom => source node
5327 # spice_ticket => used for spice migration, passed via tunnel/stdin
5328 # network => CIDR of migration network
5329 # type => secure/insecure - tunnel over encrypted connection or plain-text
5330 # nbd_proto_version => int, 0 for TCP, 1 for UNIX
5331 # replicated_volumes = which volids should be re-used with bitmaps for nbd migration
5332 sub vm_start_nolock {
5333 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5334
5335 my $statefile = $params->{statefile};
5336 my $resume = $params->{resume};
5337
5338 my $migratedfrom = $migrate_opts->{migratedfrom};
5339 my $migration_type = $migrate_opts->{type};
5340
5341 my $res = {};
5342
5343 # clean up leftover reboot request files
5344 eval { clear_reboot_request($vmid); };
5345 warn $@ if $@;
5346
5347 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5348 vmconfig_apply_pending($vmid, $conf, $storecfg);
5349 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5350 }
5351
5352 # don't regenerate the ISO if the VM is started as part of a live migration
5353 # this way we can reuse the old ISO with the correct config
5354 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if !$migratedfrom;
5355
5356 my $defaults = load_defaults();
5357
5358 # set environment variable useful inside network script
5359 $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom;
5360
5361 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5362
5363 my $forcemachine = $params->{forcemachine};
5364 my $forcecpu = $params->{forcecpu};
5365 if ($resume) {
5366 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5367 $forcemachine = $conf->{runningmachine};
5368 $forcecpu = $conf->{runningcpu};
5369 print "Resuming suspended VM\n";
5370 }
5371
5372 my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid,
5373 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
5374
5375 my $migration_ip;
5376 my $get_migration_ip = sub {
5377 my ($nodename) = @_;
5378
5379 return $migration_ip if defined($migration_ip);
5380
5381 my $cidr = $migrate_opts->{network};
5382
5383 if (!defined($cidr)) {
5384 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5385 $cidr = $dc_conf->{migration}->{network};
5386 }
5387
5388 if (defined($cidr)) {
5389 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5390
5391 die "could not get IP: no address configured on local " .
5392 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5393
5394 die "could not get IP: multiple addresses configured on local " .
5395 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5396
5397 $migration_ip = @$ips[0];
5398 }
5399
5400 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5401 if !defined($migration_ip);
5402
5403 return $migration_ip;
5404 };
5405
5406 my $migrate_uri;
5407 if ($statefile) {
5408 if ($statefile eq 'tcp') {
5409 my $localip = "localhost";
5410 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5411 my $nodename = nodename();
5412
5413 if (!defined($migration_type)) {
5414 if (defined($datacenterconf->{migration}->{type})) {
5415 $migration_type = $datacenterconf->{migration}->{type};
5416 } else {
5417 $migration_type = 'secure';
5418 }
5419 }
5420
5421 if ($migration_type eq 'insecure') {
5422 $localip = $get_migration_ip->($nodename);
5423 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5424 }
5425
5426 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5427 my $migrate_port = PVE::Tools::next_migrate_port($pfamily);
5428 $migrate_uri = "tcp:${localip}:${migrate_port}";
5429 push @$cmd, '-incoming', $migrate_uri;
5430 push @$cmd, '-S';
5431
5432 } elsif ($statefile eq 'unix') {
5433 # should be default for secure migrations as a ssh TCP forward
5434 # tunnel is not deterministic reliable ready and fails regurarly
5435 # to set up in time, so use UNIX socket forwards
5436 my $socket_addr = "/run/qemu-server/$vmid.migrate";
5437 unlink $socket_addr;
5438
5439 $migrate_uri = "unix:$socket_addr";
5440
5441 push @$cmd, '-incoming', $migrate_uri;
5442 push @$cmd, '-S';
5443
5444 } elsif (-e $statefile) {
5445 push @$cmd, '-loadstate', $statefile;
5446 } else {
5447 my $statepath = PVE::Storage::path($storecfg, $statefile);
5448 push @$vollist, $statefile;
5449 push @$cmd, '-loadstate', $statepath;
5450 }
5451 } elsif ($params->{paused}) {
5452 push @$cmd, '-S';
5453 }
5454
5455 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5456
5457 my $pci_devices = {}; # host pci devices
5458 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
5459 my $dev = $conf->{"hostpci$i"} or next;
5460 $pci_devices->{$i} = parse_hostpci($dev);
5461 }
5462
5463 my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } values $pci_devices->%* ];
5464 # reserve all PCI IDs before actually doing anything with them
5465 PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout);
5466
5467 eval {
5468 for my $id (sort keys %$pci_devices) {
5469 my $d = $pci_devices->{$id};
5470 for my $dev ($d->{pciid}->@*) {
5471 PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
5472 }
5473 }
5474 };
5475 if (my $err = $@) {
5476 eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
5477 warn $@ if $@;
5478 die $err;
5479 }
5480
5481 PVE::Storage::activate_volumes($storecfg, $vollist);
5482
5483 eval {
5484 run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{});
5485 };
5486 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5487 # timeout should be more than enough here...
5488 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 5);
5489
5490 my $cpuunits = get_cpuunits($conf);
5491
5492 my %run_params = (
5493 timeout => $statefile ? undef : $start_timeout,
5494 umask => 0077,
5495 noerr => 1,
5496 );
5497
5498 # when migrating, prefix QEMU output so other side can pick up any
5499 # errors that might occur and show the user
5500 if ($migratedfrom) {
5501 $run_params{quiet} = 1;
5502 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5503 }
5504
5505 my %systemd_properties = (
5506 Slice => 'qemu.slice',
5507 KillMode => 'process',
5508 SendSIGKILL => 0,
5509 TimeoutStopUSec => ULONG_MAX, # infinity
5510 );
5511
5512 if (PVE::CGroup::cgroup_mode() == 2) {
5513 $cpuunits = 10000 if $cpuunits >= 10000; # else we get an error
5514 $systemd_properties{CPUWeight} = $cpuunits;
5515 } else {
5516 $systemd_properties{CPUShares} = $cpuunits;
5517 }
5518
5519 if (my $cpulimit = $conf->{cpulimit}) {
5520 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5521 }
5522 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5523
5524 my $run_qemu = sub {
5525 PVE::Tools::run_fork sub {
5526 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5527
5528 my $tpmpid;
5529 if (my $tpm = $conf->{tpmstate0}) {
5530 # start the TPM emulator so QEMU can connect on start
5531 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5532 }
5533
5534 my $exitcode = run_command($cmd, %run_params);
5535 if ($exitcode) {
5536 if ($tpmpid) {
5537 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5538 kill 'TERM', $tpmpid;
5539 }
5540 die "QEMU exited with code $exitcode\n";
5541 }
5542 };
5543 };
5544
5545 if ($conf->{hugepages}) {
5546
5547 my $code = sub {
5548 my $hugepages_topology = PVE::QemuServer::Memory::hugepages_topology($conf);
5549 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5550
5551 PVE::QemuServer::Memory::hugepages_mount();
5552 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5553
5554 eval { $run_qemu->() };
5555 if (my $err = $@) {
5556 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5557 if !$conf->{keephugepages};
5558 die $err;
5559 }
5560
5561 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5562 if !$conf->{keephugepages};
5563 };
5564 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5565
5566 } else {
5567 eval { $run_qemu->() };
5568 }
5569
5570 if (my $err = $@) {
5571 # deactivate volumes if start fails
5572 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5573 eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
5574
5575 die "start failed: $err";
5576 }
5577
5578 # re-reserve all PCI IDs now that we can know the actual VM PID
5579 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5580 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) };
5581 warn $@ if $@;
5582
5583 print "migration listens on $migrate_uri\n" if $migrate_uri;
5584 $res->{migrate_uri} = $migrate_uri;
5585
5586 if ($statefile && $statefile ne 'tcp' && $statefile ne 'unix') {
5587 eval { mon_cmd($vmid, "cont"); };
5588 warn $@ if $@;
5589 }
5590
5591 #start nbd server for storage migration
5592 if (my $nbd = $migrate_opts->{nbd}) {
5593 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
5594
5595 my $migrate_storage_uri;
5596 # nbd_protocol_version > 0 for unix socket support
5597 if ($nbd_protocol_version > 0 && $migration_type eq 'secure') {
5598 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5599 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5600 $migrate_storage_uri = "nbd:unix:$socket_path";
5601 } else {
5602 my $nodename = nodename();
5603 my $localip = $get_migration_ip->($nodename);
5604 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5605 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
5606
5607 mon_cmd($vmid, "nbd-server-start", addr => {
5608 type => 'inet',
5609 data => {
5610 host => "${localip}",
5611 port => "${storage_migrate_port}",
5612 },
5613 });
5614 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5615 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
5616 }
5617
5618 $res->{migrate_storage_uri} = $migrate_storage_uri;
5619
5620 foreach my $opt (sort keys %$nbd) {
5621 my $drivestr = $nbd->{$opt}->{drivestr};
5622 my $volid = $nbd->{$opt}->{volid};
5623 mon_cmd($vmid, "nbd-server-add", device => "drive-$opt", writable => JSON::true );
5624 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
5625 print "storage migration listens on $nbd_uri volume:$drivestr\n";
5626 print "re-using replicated volume: $opt - $volid\n"
5627 if $nbd->{$opt}->{replicated};
5628
5629 $res->{drives}->{$opt} = $nbd->{$opt};
5630 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
5631 }
5632 }
5633
5634 if ($migratedfrom) {
5635 eval {
5636 set_migration_caps($vmid);
5637 };
5638 warn $@ if $@;
5639
5640 if ($spice_port) {
5641 print "spice listens on port $spice_port\n";
5642 $res->{spice_port} = $spice_port;
5643 if ($migrate_opts->{spice_ticket}) {
5644 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
5645 $migrate_opts->{spice_ticket});
5646 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
5647 }
5648 }
5649
5650 } else {
5651 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
5652 if !$statefile && $conf->{balloon};
5653
5654 foreach my $opt (keys %$conf) {
5655 next if $opt !~ m/^net\d+$/;
5656 my $nicconf = parse_net($conf->{$opt});
5657 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
5658 }
5659 }
5660
5661 mon_cmd($vmid, 'qom-set',
5662 path => "machine/peripheral/balloon0",
5663 property => "guest-stats-polling-interval",
5664 value => 2) if (!defined($conf->{balloon}) || $conf->{balloon});
5665
5666 if ($resume) {
5667 print "Resumed VM, removing state\n";
5668 if (my $vmstate = $conf->{vmstate}) {
5669 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
5670 PVE::Storage::vdisk_free($storecfg, $vmstate);
5671 }
5672 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
5673 PVE::QemuConfig->write_config($vmid, $conf);
5674 }
5675
5676 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
5677
5678 return $res;
5679 }
5680
5681 sub vm_commandline {
5682 my ($storecfg, $vmid, $snapname) = @_;
5683
5684 my $conf = PVE::QemuConfig->load_config($vmid);
5685
5686 my ($forcemachine, $forcecpu);
5687 if ($snapname) {
5688 my $snapshot = $conf->{snapshots}->{$snapname};
5689 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
5690
5691 # check for machine or CPU overrides in snapshot
5692 $forcemachine = $snapshot->{runningmachine};
5693 $forcecpu = $snapshot->{runningcpu};
5694
5695 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
5696
5697 $conf = $snapshot;
5698 }
5699
5700 my $defaults = load_defaults();
5701
5702 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
5703
5704 return PVE::Tools::cmd2string($cmd);
5705 }
5706
5707 sub vm_reset {
5708 my ($vmid, $skiplock) = @_;
5709
5710 PVE::QemuConfig->lock_config($vmid, sub {
5711
5712 my $conf = PVE::QemuConfig->load_config($vmid);
5713
5714 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5715
5716 mon_cmd($vmid, "system_reset");
5717 });
5718 }
5719
5720 sub get_vm_volumes {
5721 my ($conf) = @_;
5722
5723 my $vollist = [];
5724 foreach_volid($conf, sub {
5725 my ($volid, $attr) = @_;
5726
5727 return if $volid =~ m|^/|;
5728
5729 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
5730 return if !$sid;
5731
5732 push @$vollist, $volid;
5733 });
5734
5735 return $vollist;
5736 }
5737
5738 sub vm_stop_cleanup {
5739 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
5740
5741 eval {
5742
5743 if (!$keepActive) {
5744 my $vollist = get_vm_volumes($conf);
5745 PVE::Storage::deactivate_volumes($storecfg, $vollist);
5746
5747 if (my $tpmdrive = $conf->{tpmstate0}) {
5748 my $tpm = parse_drive("tpmstate0", $tpmdrive);
5749 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
5750 if ($storeid) {
5751 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
5752 }
5753 }
5754 }
5755
5756 foreach my $ext (qw(mon qmp pid vnc qga)) {
5757 unlink "/var/run/qemu-server/${vmid}.$ext";
5758 }
5759
5760 if ($conf->{ivshmem}) {
5761 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
5762 # just delete it for now, VMs which have this already open do not
5763 # are affected, but new VMs will get a separated one. If this
5764 # becomes an issue we either add some sort of ref-counting or just
5765 # add a "don't delete on stop" flag to the ivshmem format.
5766 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
5767 }
5768
5769 my $ids = [];
5770 foreach my $key (keys %$conf) {
5771 next if $key !~ m/^hostpci(\d+)$/;
5772 my $hostpciindex = $1;
5773 my $d = parse_hostpci($conf->{$key});
5774 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
5775
5776 foreach my $pci (@{$d->{pciid}}) {
5777 my $pciid = $pci->{id};
5778 push @$ids, $pci->{id};
5779 PVE::SysFSTools::pci_cleanup_mdev_device($pciid, $uuid);
5780 }
5781 }
5782 PVE::QemuServer::PCI::remove_pci_reservation($ids);
5783
5784 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
5785 };
5786 warn $@ if $@; # avoid errors - just warn
5787 }
5788
5789 # call only in locked context
5790 sub _do_vm_stop {
5791 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
5792
5793 my $pid = check_running($vmid, $nocheck);
5794 return if !$pid;
5795
5796 my $conf;
5797 if (!$nocheck) {
5798 $conf = PVE::QemuConfig->load_config($vmid);
5799 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5800 if (!defined($timeout) && $shutdown && $conf->{startup}) {
5801 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
5802 $timeout = $opts->{down} if $opts->{down};
5803 }
5804 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
5805 }
5806
5807 eval {
5808 if ($shutdown) {
5809 if (defined($conf) && get_qga_key($conf, 'enabled')) {
5810 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
5811 } else {
5812 mon_cmd($vmid, "system_powerdown");
5813 }
5814 } else {
5815 mon_cmd($vmid, "quit");
5816 }
5817 };
5818 my $err = $@;
5819
5820 if (!$err) {
5821 $timeout = 60 if !defined($timeout);
5822
5823 my $count = 0;
5824 while (($count < $timeout) && check_running($vmid, $nocheck)) {
5825 $count++;
5826 sleep 1;
5827 }
5828
5829 if ($count >= $timeout) {
5830 if ($force) {
5831 warn "VM still running - terminating now with SIGTERM\n";
5832 kill 15, $pid;
5833 } else {
5834 die "VM quit/powerdown failed - got timeout\n";
5835 }
5836 } else {
5837 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
5838 return;
5839 }
5840 } else {
5841 if (!check_running($vmid, $nocheck)) {
5842 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
5843 return;
5844 }
5845 if ($force) {
5846 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
5847 kill 15, $pid;
5848 } else {
5849 die "VM quit/powerdown failed\n";
5850 }
5851 }
5852
5853 # wait again
5854 $timeout = 10;
5855
5856 my $count = 0;
5857 while (($count < $timeout) && check_running($vmid, $nocheck)) {
5858 $count++;
5859 sleep 1;
5860 }
5861
5862 if ($count >= $timeout) {
5863 warn "VM still running - terminating now with SIGKILL\n";
5864 kill 9, $pid;
5865 sleep 1;
5866 }
5867
5868 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
5869 }
5870
5871 # Note: use $nocheck to skip tests if VM configuration file exists.
5872 # We need that when migration VMs to other nodes (files already moved)
5873 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
5874 sub vm_stop {
5875 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
5876
5877 $force = 1 if !defined($force) && !$shutdown;
5878
5879 if ($migratedfrom){
5880 my $pid = check_running($vmid, $nocheck, $migratedfrom);
5881 kill 15, $pid if $pid;
5882 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
5883 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
5884 return;
5885 }
5886
5887 PVE::QemuConfig->lock_config($vmid, sub {
5888 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
5889 });
5890 }
5891
5892 sub vm_reboot {
5893 my ($vmid, $timeout) = @_;
5894
5895 PVE::QemuConfig->lock_config($vmid, sub {
5896 eval {
5897
5898 # only reboot if running, as qmeventd starts it again on a stop event
5899 return if !check_running($vmid);
5900
5901 create_reboot_request($vmid);
5902
5903 my $storecfg = PVE::Storage::config();
5904 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
5905
5906 };
5907 if (my $err = $@) {
5908 # avoid that the next normal shutdown will be confused for a reboot
5909 clear_reboot_request($vmid);
5910 die $err;
5911 }
5912 });
5913 }
5914
5915 # note: if using the statestorage parameter, the caller has to check privileges
5916 sub vm_suspend {
5917 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
5918
5919 my $conf;
5920 my $path;
5921 my $storecfg;
5922 my $vmstate;
5923
5924 PVE::QemuConfig->lock_config($vmid, sub {
5925
5926 $conf = PVE::QemuConfig->load_config($vmid);
5927
5928 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
5929 PVE::QemuConfig->check_lock($conf)
5930 if !($skiplock || $is_backing_up);
5931
5932 die "cannot suspend to disk during backup\n"
5933 if $is_backing_up && $includestate;
5934
5935 if ($includestate) {
5936 $conf->{lock} = 'suspending';
5937 my $date = strftime("%Y-%m-%d", localtime(time()));
5938 $storecfg = PVE::Storage::config();
5939 if (!$statestorage) {
5940 $statestorage = find_vmstate_storage($conf, $storecfg);
5941 # check permissions for the storage
5942 my $rpcenv = PVE::RPCEnvironment::get();
5943 if ($rpcenv->{type} ne 'cli') {
5944 my $authuser = $rpcenv->get_user();
5945 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
5946 }
5947 }
5948
5949
5950 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
5951 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
5952 $path = PVE::Storage::path($storecfg, $vmstate);
5953 PVE::QemuConfig->write_config($vmid, $conf);
5954 } else {
5955 mon_cmd($vmid, "stop");
5956 }
5957 });
5958
5959 if ($includestate) {
5960 # save vm state
5961 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
5962
5963 eval {
5964 set_migration_caps($vmid, 1);
5965 mon_cmd($vmid, "savevm-start", statefile => $path);
5966 for(;;) {
5967 my $state = mon_cmd($vmid, "query-savevm");
5968 if (!$state->{status}) {
5969 die "savevm not active\n";
5970 } elsif ($state->{status} eq 'active') {
5971 sleep(1);
5972 next;
5973 } elsif ($state->{status} eq 'completed') {
5974 print "State saved, quitting\n";
5975 last;
5976 } elsif ($state->{status} eq 'failed' && $state->{error}) {
5977 die "query-savevm failed with error '$state->{error}'\n"
5978 } else {
5979 die "query-savevm returned status '$state->{status}'\n";
5980 }
5981 }
5982 };
5983 my $err = $@;
5984
5985 PVE::QemuConfig->lock_config($vmid, sub {
5986 $conf = PVE::QemuConfig->load_config($vmid);
5987 if ($err) {
5988 # cleanup, but leave suspending lock, to indicate something went wrong
5989 eval {
5990 mon_cmd($vmid, "savevm-end");
5991 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
5992 PVE::Storage::vdisk_free($storecfg, $vmstate);
5993 delete $conf->@{qw(vmstate runningmachine runningcpu)};
5994 PVE::QemuConfig->write_config($vmid, $conf);
5995 };
5996 warn $@ if $@;
5997 die $err;
5998 }
5999
6000 die "lock changed unexpectedly\n"
6001 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6002
6003 mon_cmd($vmid, "quit");
6004 $conf->{lock} = 'suspended';
6005 PVE::QemuConfig->write_config($vmid, $conf);
6006 });
6007 }
6008 }
6009
6010 sub vm_resume {
6011 my ($vmid, $skiplock, $nocheck) = @_;
6012
6013 PVE::QemuConfig->lock_config($vmid, sub {
6014 my $res = mon_cmd($vmid, 'query-status');
6015 my $resume_cmd = 'cont';
6016 my $reset = 0;
6017
6018 if ($res->{status}) {
6019 return if $res->{status} eq 'running'; # job done, go home
6020 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6021 $reset = 1 if $res->{status} eq 'shutdown';
6022 }
6023
6024 if (!$nocheck) {
6025
6026 my $conf = PVE::QemuConfig->load_config($vmid);
6027
6028 PVE::QemuConfig->check_lock($conf)
6029 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
6030 }
6031
6032 if ($reset) {
6033 # required if a VM shuts down during a backup and we get a resume
6034 # request before the backup finishes for example
6035 mon_cmd($vmid, "system_reset");
6036 }
6037 mon_cmd($vmid, $resume_cmd);
6038 });
6039 }
6040
6041 sub vm_sendkey {
6042 my ($vmid, $skiplock, $key) = @_;
6043
6044 PVE::QemuConfig->lock_config($vmid, sub {
6045
6046 my $conf = PVE::QemuConfig->load_config($vmid);
6047
6048 # there is no qmp command, so we use the human monitor command
6049 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
6050 die $res if $res ne '';
6051 });
6052 }
6053
6054 # vzdump restore implementaion
6055
6056 sub tar_archive_read_firstfile {
6057 my $archive = shift;
6058
6059 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6060
6061 # try to detect archive type first
6062 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
6063 die "unable to open file '$archive'\n";
6064 my $firstfile = <$fh>;
6065 kill 15, $pid;
6066 close $fh;
6067
6068 die "ERROR: archive contaions no data\n" if !$firstfile;
6069 chomp $firstfile;
6070
6071 return $firstfile;
6072 }
6073
6074 sub tar_restore_cleanup {
6075 my ($storecfg, $statfile) = @_;
6076
6077 print STDERR "starting cleanup\n";
6078
6079 if (my $fd = IO::File->new($statfile, "r")) {
6080 while (defined(my $line = <$fd>)) {
6081 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6082 my $volid = $2;
6083 eval {
6084 if ($volid =~ m|^/|) {
6085 unlink $volid || die 'unlink failed\n';
6086 } else {
6087 PVE::Storage::vdisk_free($storecfg, $volid);
6088 }
6089 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6090 };
6091 print STDERR "unable to cleanup '$volid' - $@" if $@;
6092 } else {
6093 print STDERR "unable to parse line in statfile - $line";
6094 }
6095 }
6096 $fd->close();
6097 }
6098 }
6099
6100 sub restore_file_archive {
6101 my ($archive, $vmid, $user, $opts) = @_;
6102
6103 return restore_vma_archive($archive, $vmid, $user, $opts)
6104 if $archive eq '-';
6105
6106 my $info = PVE::Storage::archive_info($archive);
6107 my $format = $opts->{format} // $info->{format};
6108 my $comp = $info->{compression};
6109
6110 # try to detect archive format
6111 if ($format eq 'tar') {
6112 return restore_tar_archive($archive, $vmid, $user, $opts);
6113 } else {
6114 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6115 }
6116 }
6117
6118 # hepler to remove disks that will not be used after restore
6119 my $restore_cleanup_oldconf = sub {
6120 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6121
6122 PVE::QemuConfig->foreach_volume($oldconf, sub {
6123 my ($ds, $drive) = @_;
6124
6125 return if drive_is_cdrom($drive, 1);
6126
6127 my $volid = $drive->{file};
6128 return if !$volid || $volid =~ m|^/|;
6129
6130 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6131 return if !$path || !$owner || ($owner != $vmid);
6132
6133 # Note: only delete disk we want to restore
6134 # other volumes will become unused
6135 if ($virtdev_hash->{$ds}) {
6136 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6137 if (my $err = $@) {
6138 warn $err;
6139 }
6140 }
6141 });
6142
6143 # delete vmstate files, after the restore we have no snapshots anymore
6144 foreach my $snapname (keys %{$oldconf->{snapshots}}) {
6145 my $snap = $oldconf->{snapshots}->{$snapname};
6146 if ($snap->{vmstate}) {
6147 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6148 if (my $err = $@) {
6149 warn $err;
6150 }
6151 }
6152 }
6153 };
6154
6155 # Helper to parse vzdump backup device hints
6156 #
6157 # $rpcenv: Environment, used to ckeck storage permissions
6158 # $user: User ID, to check storage permissions
6159 # $storecfg: Storage configuration
6160 # $fh: the file handle for reading the configuration
6161 # $devinfo: should contain device sizes for all backu-up'ed devices
6162 # $options: backup options (pool, default storage)
6163 #
6164 # Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6165 my $parse_backup_hints = sub {
6166 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6167
6168 my $virtdev_hash = {};
6169
6170 while (defined(my $line = <$fh>)) {
6171 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6172 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6173 die "archive does not contain data for drive '$virtdev'\n"
6174 if !$devinfo->{$devname};
6175
6176 if (defined($options->{storage})) {
6177 $storeid = $options->{storage} || 'local';
6178 } elsif (!$storeid) {
6179 $storeid = 'local';
6180 }
6181 $format = 'raw' if !$format;
6182 $devinfo->{$devname}->{devname} = $devname;
6183 $devinfo->{$devname}->{virtdev} = $virtdev;
6184 $devinfo->{$devname}->{format} = $format;
6185 $devinfo->{$devname}->{storeid} = $storeid;
6186
6187 # check permission on storage
6188 my $pool = $options->{pool}; # todo: do we need that?
6189 if ($user ne 'root@pam') {
6190 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace']);
6191 }
6192
6193 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6194 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6195 my $virtdev = $1;
6196 my $drive = parse_drive($virtdev, $2);
6197 if (drive_is_cloudinit($drive)) {
6198 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6199 $storeid = $options->{storage} if defined ($options->{storage});
6200 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6201 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6202
6203 $virtdev_hash->{$virtdev} = {
6204 format => $format,
6205 storeid => $storeid,
6206 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6207 is_cloudinit => 1,
6208 };
6209 }
6210 }
6211 }
6212
6213 return $virtdev_hash;
6214 };
6215
6216 # Helper to allocate and activate all volumes required for a restore
6217 #
6218 # $storecfg: Storage configuration
6219 # $virtdev_hash: as returned by parse_backup_hints()
6220 #
6221 # Returns: { $virtdev => $volid }
6222 my $restore_allocate_devices = sub {
6223 my ($storecfg, $virtdev_hash, $vmid) = @_;
6224
6225 my $map = {};
6226 foreach my $virtdev (sort keys %$virtdev_hash) {
6227 my $d = $virtdev_hash->{$virtdev};
6228 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6229 my $storeid = $d->{storeid};
6230 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6231
6232 # test if requested format is supported
6233 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6234 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6235 $d->{format} = $defFormat if !$supported;
6236
6237 my $name;
6238 if ($d->{is_cloudinit}) {
6239 $name = "vm-$vmid-cloudinit";
6240 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6241 if ($scfg->{path}) {
6242 $name .= ".$d->{format}";
6243 }
6244 }
6245
6246 my $volid = PVE::Storage::vdisk_alloc(
6247 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6248
6249 print STDERR "new volume ID is '$volid'\n";
6250 $d->{volid} = $volid;
6251
6252 PVE::Storage::activate_volumes($storecfg, [$volid]);
6253
6254 $map->{$virtdev} = $volid;
6255 }
6256
6257 return $map;
6258 };
6259
6260 sub restore_update_config_line {
6261 my ($cookie, $map, $line, $unique) = @_;
6262
6263 return '' if $line =~ m/^\#qmdump\#/;
6264 return '' if $line =~ m/^\#vzdump\#/;
6265 return '' if $line =~ m/^lock:/;
6266 return '' if $line =~ m/^unused\d+:/;
6267 return '' if $line =~ m/^parent:/;
6268
6269 my $res = '';
6270
6271 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6272 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6273 # try to convert old 1.X settings
6274 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6275 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6276 my ($model, $macaddr) = split(/\=/, $devconfig);
6277 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6278 my $net = {
6279 model => $model,
6280 bridge => "vmbr$ind",
6281 macaddr => $macaddr,
6282 };
6283 my $netstr = print_net($net);
6284
6285 $res .= "net$cookie->{netcount}: $netstr\n";
6286 $cookie->{netcount}++;
6287 }
6288 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6289 my ($id, $netstr) = ($1, $2);
6290 my $net = parse_net($netstr);
6291 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6292 $netstr = print_net($net);
6293 $res .= "$id: $netstr\n";
6294 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6295 my $virtdev = $1;
6296 my $value = $3;
6297 my $di = parse_drive($virtdev, $value);
6298 if (defined($di->{backup}) && !$di->{backup}) {
6299 $res .= "#$line";
6300 } elsif ($map->{$virtdev}) {
6301 delete $di->{format}; # format can change on restore
6302 $di->{file} = $map->{$virtdev};
6303 $value = print_drive($di);
6304 $res .= "$virtdev: $value\n";
6305 } else {
6306 $res .= $line;
6307 }
6308 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6309 my $vmgenid = $1;
6310 if ($vmgenid ne '0') {
6311 # always generate a new vmgenid if there was a valid one setup
6312 $vmgenid = generate_uuid();
6313 }
6314 $res .= "vmgenid: $vmgenid\n";
6315 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6316 my ($uuid, $uuid_str);
6317 UUID::generate($uuid);
6318 UUID::unparse($uuid, $uuid_str);
6319 my $smbios1 = parse_smbios1($2);
6320 $smbios1->{uuid} = $uuid_str;
6321 $res .= $1.print_smbios1($smbios1)."\n";
6322 } else {
6323 $res .= $line;
6324 }
6325
6326 return $res;
6327 }
6328
6329 my $restore_deactivate_volumes = sub {
6330 my ($storecfg, $devinfo) = @_;
6331
6332 my $vollist = [];
6333 foreach my $devname (keys %$devinfo) {
6334 my $volid = $devinfo->{$devname}->{volid};
6335 push @$vollist, $volid if $volid;
6336 }
6337
6338 PVE::Storage::deactivate_volumes($storecfg, $vollist);
6339 };
6340
6341 my $restore_destroy_volumes = sub {
6342 my ($storecfg, $devinfo) = @_;
6343
6344 foreach my $devname (keys %$devinfo) {
6345 my $volid = $devinfo->{$devname}->{volid};
6346 next if !$volid;
6347 eval {
6348 if ($volid =~ m|^/|) {
6349 unlink $volid || die 'unlink failed\n';
6350 } else {
6351 PVE::Storage::vdisk_free($storecfg, $volid);
6352 }
6353 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6354 };
6355 print STDERR "unable to cleanup '$volid' - $@" if $@;
6356 }
6357 };
6358
6359 sub scan_volids {
6360 my ($cfg, $vmid) = @_;
6361
6362 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6363
6364 my $volid_hash = {};
6365 foreach my $storeid (keys %$info) {
6366 foreach my $item (@{$info->{$storeid}}) {
6367 next if !($item->{volid} && $item->{size});
6368 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6369 $volid_hash->{$item->{volid}} = $item;
6370 }
6371 }
6372
6373 return $volid_hash;
6374 }
6375
6376 sub update_disk_config {
6377 my ($vmid, $conf, $volid_hash) = @_;
6378
6379 my $changes;
6380 my $prefix = "VM $vmid";
6381
6382 # used and unused disks
6383 my $referenced = {};
6384
6385 # Note: it is allowed to define multiple storages with same path (alias), so
6386 # we need to check both 'volid' and real 'path' (two different volid can point
6387 # to the same path).
6388
6389 my $referencedpath = {};
6390
6391 # update size info
6392 PVE::QemuConfig->foreach_volume($conf, sub {
6393 my ($opt, $drive) = @_;
6394
6395 my $volid = $drive->{file};
6396 return if !$volid;
6397 my $volume = $volid_hash->{$volid};
6398
6399 # mark volid as "in-use" for next step
6400 $referenced->{$volid} = 1;
6401 if ($volume && (my $path = $volume->{path})) {
6402 $referencedpath->{$path} = 1;
6403 }
6404
6405 return if drive_is_cdrom($drive);
6406 return if !$volume;
6407
6408 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6409 if (defined($updated)) {
6410 $changes = 1;
6411 $conf->{$opt} = print_drive($updated);
6412 print "$prefix ($opt): $msg\n";
6413 }
6414 });
6415
6416 # remove 'unusedX' entry if volume is used
6417 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6418 my ($opt, $drive) = @_;
6419
6420 my $volid = $drive->{file};
6421 return if !$volid;
6422
6423 my $path;
6424 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6425 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6426 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6427 $changes = 1;
6428 delete $conf->{$opt};
6429 }
6430
6431 $referenced->{$volid} = 1;
6432 $referencedpath->{$path} = 1 if $path;
6433 });
6434
6435 foreach my $volid (sort keys %$volid_hash) {
6436 next if $volid =~ m/vm-$vmid-state-/;
6437 next if $referenced->{$volid};
6438 my $path = $volid_hash->{$volid}->{path};
6439 next if !$path; # just to be sure
6440 next if $referencedpath->{$path};
6441 $changes = 1;
6442 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6443 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6444 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6445 }
6446
6447 return $changes;
6448 }
6449
6450 sub rescan {
6451 my ($vmid, $nolock, $dryrun) = @_;
6452
6453 my $cfg = PVE::Storage::config();
6454
6455 print "rescan volumes...\n";
6456 my $volid_hash = scan_volids($cfg, $vmid);
6457
6458 my $updatefn = sub {
6459 my ($vmid) = @_;
6460
6461 my $conf = PVE::QemuConfig->load_config($vmid);
6462
6463 PVE::QemuConfig->check_lock($conf);
6464
6465 my $vm_volids = {};
6466 foreach my $volid (keys %$volid_hash) {
6467 my $info = $volid_hash->{$volid};
6468 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6469 }
6470
6471 my $changes = update_disk_config($vmid, $conf, $vm_volids);
6472
6473 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
6474 };
6475
6476 if (defined($vmid)) {
6477 if ($nolock) {
6478 &$updatefn($vmid);
6479 } else {
6480 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6481 }
6482 } else {
6483 my $vmlist = config_list();
6484 foreach my $vmid (keys %$vmlist) {
6485 if ($nolock) {
6486 &$updatefn($vmid);
6487 } else {
6488 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6489 }
6490 }
6491 }
6492 }
6493
6494 sub restore_proxmox_backup_archive {
6495 my ($archive, $vmid, $user, $options) = @_;
6496
6497 my $storecfg = PVE::Storage::config();
6498
6499 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
6500 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6501
6502 my $fingerprint = $scfg->{fingerprint};
6503 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
6504
6505 my $repo = PVE::PBSClient::get_repository($scfg);
6506
6507 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
6508 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
6509 local $ENV{PBS_PASSWORD} = $password;
6510 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
6511
6512 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
6513 PVE::Storage::parse_volname($storecfg, $archive);
6514
6515 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
6516
6517 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
6518
6519 my $tmpdir = "/var/tmp/vzdumptmp$$";
6520 rmtree $tmpdir;
6521 mkpath $tmpdir;
6522
6523 my $conffile = PVE::QemuConfig->config_file($vmid);
6524 # disable interrupts (always do cleanups)
6525 local $SIG{INT} =
6526 local $SIG{TERM} =
6527 local $SIG{QUIT} =
6528 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
6529
6530 # Note: $oldconf is undef if VM does not exists
6531 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6532 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6533 my $new_conf_raw = '';
6534
6535 my $rpcenv = PVE::RPCEnvironment::get();
6536 my $devinfo = {};
6537
6538 eval {
6539 # enable interrupts
6540 local $SIG{INT} =
6541 local $SIG{TERM} =
6542 local $SIG{QUIT} =
6543 local $SIG{HUP} =
6544 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6545
6546 my $cfgfn = "$tmpdir/qemu-server.conf";
6547 my $firewall_config_fn = "$tmpdir/fw.conf";
6548 my $index_fn = "$tmpdir/index.json";
6549
6550 my $cmd = "restore";
6551
6552 my $param = [$pbs_backup_name, "index.json", $index_fn];
6553 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6554 my $index = PVE::Tools::file_get_contents($index_fn);
6555 $index = decode_json($index);
6556
6557 # print Dumper($index);
6558 foreach my $info (@{$index->{files}}) {
6559 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
6560 my $devname = $1;
6561 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
6562 $devinfo->{$devname}->{size} = $1;
6563 } else {
6564 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
6565 }
6566 }
6567 }
6568
6569 my $is_qemu_server_backup = scalar(
6570 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
6571 );
6572 if (!$is_qemu_server_backup) {
6573 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
6574 }
6575 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
6576
6577 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
6578 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6579
6580 if ($has_firewall_config) {
6581 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
6582 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6583
6584 my $pve_firewall_dir = '/etc/pve/firewall';
6585 mkdir $pve_firewall_dir; # make sure the dir exists
6586 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
6587 }
6588
6589 my $fh = IO::File->new($cfgfn, "r") ||
6590 die "unable to read qemu-server.conf - $!\n";
6591
6592 my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
6593
6594 # fixme: rate limit?
6595
6596 # create empty/temp config
6597 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
6598
6599 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
6600
6601 # allocate volumes
6602 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
6603
6604 foreach my $virtdev (sort keys %$virtdev_hash) {
6605 my $d = $virtdev_hash->{$virtdev};
6606 next if $d->{is_cloudinit}; # no need to restore cloudinit
6607
6608 # this fails if storage is unavailable
6609 my $volid = $d->{volid};
6610 my $path = PVE::Storage::path($storecfg, $volid);
6611
6612 # for live-restore we only want to preload the efidisk and TPM state
6613 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
6614
6615 my $pbs_restore_cmd = [
6616 '/usr/bin/pbs-restore',
6617 '--repository', $repo,
6618 $pbs_backup_name,
6619 "$d->{devname}.img.fidx",
6620 $path,
6621 '--verbose',
6622 ];
6623
6624 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
6625 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
6626
6627 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
6628 push @$pbs_restore_cmd, '--skip-zero';
6629 }
6630
6631 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
6632 print "restore proxmox backup image: $dbg_cmdstring\n";
6633 run_command($pbs_restore_cmd);
6634 }
6635
6636 $fh->seek(0, 0) || die "seek failed - $!\n";
6637
6638 my $cookie = { netcount => 0 };
6639 while (defined(my $line = <$fh>)) {
6640 $new_conf_raw .= restore_update_config_line(
6641 $cookie,
6642 $map,
6643 $line,
6644 $options->{unique},
6645 );
6646 }
6647
6648 $fh->close();
6649 };
6650 my $err = $@;
6651
6652 if ($err || !$options->{live}) {
6653 $restore_deactivate_volumes->($storecfg, $devinfo);
6654 }
6655
6656 rmtree $tmpdir;
6657
6658 if ($err) {
6659 $restore_destroy_volumes->($storecfg, $devinfo);
6660 die $err;
6661 }
6662
6663 if ($options->{live}) {
6664 # keep lock during live-restore
6665 $new_conf_raw .= "\nlock: create";
6666 }
6667
6668 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
6669
6670 PVE::Cluster::cfs_update(); # make sure we read new file
6671
6672 eval { rescan($vmid, 1); };
6673 warn $@ if $@;
6674
6675 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
6676
6677 if ($options->{live}) {
6678 # enable interrupts
6679 local $SIG{INT} =
6680 local $SIG{TERM} =
6681 local $SIG{QUIT} =
6682 local $SIG{HUP} =
6683 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
6684
6685 my $conf = PVE::QemuConfig->load_config($vmid);
6686 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
6687
6688 # these special drives are already restored before start
6689 delete $devinfo->{'drive-efidisk0'};
6690 delete $devinfo->{'drive-tpmstate0-backup'};
6691 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $repo, $keyfile, $pbs_backup_name);
6692
6693 PVE::QemuConfig->remove_lock($vmid, "create");
6694 }
6695 }
6696
6697 sub pbs_live_restore {
6698 my ($vmid, $conf, $storecfg, $restored_disks, $repo, $keyfile, $snap) = @_;
6699
6700 print "starting VM for live-restore\n";
6701 print "repository: '$repo', snapshot: '$snap'\n";
6702
6703 my $pbs_backing = {};
6704 for my $ds (keys %$restored_disks) {
6705 $ds =~ m/^drive-(.*)$/;
6706 my $confname = $1;
6707 $pbs_backing->{$confname} = {
6708 repository => $repo,
6709 snapshot => $snap,
6710 archive => "$ds.img.fidx",
6711 };
6712 $pbs_backing->{$confname}->{keyfile} = $keyfile if -e $keyfile;
6713
6714 my $drive = parse_drive($confname, $conf->{$confname});
6715 print "restoring '$ds' to '$drive->{file}'\n";
6716 }
6717
6718 my $drives_streamed = 0;
6719 eval {
6720 # make sure HA doesn't interrupt our restore by stopping the VM
6721 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
6722 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
6723 }
6724
6725 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
6726 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
6727 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
6728
6729 my $qmeventd_fd = register_qmeventd_handle($vmid);
6730
6731 # begin streaming, i.e. data copy from PBS to target disk for every vol,
6732 # this will effectively collapse the backing image chain consisting of
6733 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
6734 # removes itself once all backing images vanish with 'auto-remove=on')
6735 my $jobs = {};
6736 for my $ds (sort keys %$restored_disks) {
6737 my $job_id = "restore-$ds";
6738 mon_cmd($vmid, 'block-stream',
6739 'job-id' => $job_id,
6740 device => "$ds",
6741 );
6742 $jobs->{$job_id} = {};
6743 }
6744
6745 mon_cmd($vmid, 'cont');
6746 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
6747
6748 print "restore-drive jobs finished successfully, removing all tracking block devices"
6749 ." to disconnect from Proxmox Backup Server\n";
6750
6751 for my $ds (sort keys %$restored_disks) {
6752 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
6753 }
6754
6755 close($qmeventd_fd);
6756 };
6757
6758 my $err = $@;
6759
6760 if ($err) {
6761 warn "An error occured during live-restore: $err\n";
6762 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
6763 die "live-restore failed\n";
6764 }
6765 }
6766
6767 sub restore_vma_archive {
6768 my ($archive, $vmid, $user, $opts, $comp) = @_;
6769
6770 my $readfrom = $archive;
6771
6772 my $cfg = PVE::Storage::config();
6773 my $commands = [];
6774 my $bwlimit = $opts->{bwlimit};
6775
6776 my $dbg_cmdstring = '';
6777 my $add_pipe = sub {
6778 my ($cmd) = @_;
6779 push @$commands, $cmd;
6780 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
6781 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
6782 $readfrom = '-';
6783 };
6784
6785 my $input = undef;
6786 if ($archive eq '-') {
6787 $input = '<&STDIN';
6788 } else {
6789 # If we use a backup from a PVE defined storage we also consider that
6790 # storage's rate limit:
6791 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
6792 if (defined($volid)) {
6793 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
6794 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
6795 if ($readlimit) {
6796 print STDERR "applying read rate limit: $readlimit\n";
6797 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
6798 $add_pipe->($cstream);
6799 }
6800 }
6801 }
6802
6803 if ($comp) {
6804 my $info = PVE::Storage::decompressor_info('vma', $comp);
6805 my $cmd = $info->{decompressor};
6806 push @$cmd, $readfrom;
6807 $add_pipe->($cmd);
6808 }
6809
6810 my $tmpdir = "/var/tmp/vzdumptmp$$";
6811 rmtree $tmpdir;
6812
6813 # disable interrupts (always do cleanups)
6814 local $SIG{INT} =
6815 local $SIG{TERM} =
6816 local $SIG{QUIT} =
6817 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
6818
6819 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
6820 POSIX::mkfifo($mapfifo, 0600);
6821 my $fifofh;
6822 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
6823
6824 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
6825
6826 my $oldtimeout;
6827 my $timeout = 5;
6828
6829 my $devinfo = {};
6830
6831 my $rpcenv = PVE::RPCEnvironment::get();
6832
6833 my $conffile = PVE::QemuConfig->config_file($vmid);
6834
6835 # Note: $oldconf is undef if VM does not exist
6836 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6837 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6838 my $new_conf_raw = '';
6839
6840 my %storage_limits;
6841
6842 my $print_devmap = sub {
6843 my $cfgfn = "$tmpdir/qemu-server.conf";
6844
6845 # we can read the config - that is already extracted
6846 my $fh = IO::File->new($cfgfn, "r") ||
6847 die "unable to read qemu-server.conf - $!\n";
6848
6849 my $fwcfgfn = "$tmpdir/qemu-server.fw";
6850 if (-f $fwcfgfn) {
6851 my $pve_firewall_dir = '/etc/pve/firewall';
6852 mkdir $pve_firewall_dir; # make sure the dir exists
6853 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
6854 }
6855
6856 my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
6857
6858 foreach my $info (values %{$virtdev_hash}) {
6859 my $storeid = $info->{storeid};
6860 next if defined($storage_limits{$storeid});
6861
6862 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
6863 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
6864 $storage_limits{$storeid} = $limit * 1024;
6865 }
6866
6867 foreach my $devname (keys %$devinfo) {
6868 die "found no device mapping information for device '$devname'\n"
6869 if !$devinfo->{$devname}->{virtdev};
6870 }
6871
6872 # create empty/temp config
6873 if ($oldconf) {
6874 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
6875 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
6876 }
6877
6878 # allocate volumes
6879 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
6880
6881 # print restore information to $fifofh
6882 foreach my $virtdev (sort keys %$virtdev_hash) {
6883 my $d = $virtdev_hash->{$virtdev};
6884 next if $d->{is_cloudinit}; # no need to restore cloudinit
6885
6886 my $storeid = $d->{storeid};
6887 my $volid = $d->{volid};
6888
6889 my $map_opts = '';
6890 if (my $limit = $storage_limits{$storeid}) {
6891 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
6892 }
6893
6894 my $write_zeros = 1;
6895 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
6896 $write_zeros = 0;
6897 }
6898
6899 my $path = PVE::Storage::path($cfg, $volid);
6900
6901 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
6902
6903 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
6904 }
6905
6906 $fh->seek(0, 0) || die "seek failed - $!\n";
6907
6908 my $cookie = { netcount => 0 };
6909 while (defined(my $line = <$fh>)) {
6910 $new_conf_raw .= restore_update_config_line(
6911 $cookie,
6912 $map,
6913 $line,
6914 $opts->{unique},
6915 );
6916 }
6917
6918 $fh->close();
6919 };
6920
6921 eval {
6922 # enable interrupts
6923 local $SIG{INT} =
6924 local $SIG{TERM} =
6925 local $SIG{QUIT} =
6926 local $SIG{HUP} =
6927 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6928 local $SIG{ALRM} = sub { die "got timeout\n"; };
6929
6930 $oldtimeout = alarm($timeout);
6931
6932 my $parser = sub {
6933 my $line = shift;
6934
6935 print "$line\n";
6936
6937 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
6938 my ($dev_id, $size, $devname) = ($1, $2, $3);
6939 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
6940 } elsif ($line =~ m/^CTIME: /) {
6941 # we correctly received the vma config, so we can disable
6942 # the timeout now for disk allocation (set to 10 minutes, so
6943 # that we always timeout if something goes wrong)
6944 alarm(600);
6945 &$print_devmap();
6946 print $fifofh "done\n";
6947 my $tmp = $oldtimeout || 0;
6948 $oldtimeout = undef;
6949 alarm($tmp);
6950 close($fifofh);
6951 $fifofh = undef;
6952 }
6953 };
6954
6955 print "restore vma archive: $dbg_cmdstring\n";
6956 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
6957 };
6958 my $err = $@;
6959
6960 alarm($oldtimeout) if $oldtimeout;
6961
6962 $restore_deactivate_volumes->($cfg, $devinfo);
6963
6964 close($fifofh) if $fifofh;
6965 unlink $mapfifo;
6966 rmtree $tmpdir;
6967
6968 if ($err) {
6969 $restore_destroy_volumes->($cfg, $devinfo);
6970 die $err;
6971 }
6972
6973 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
6974
6975 PVE::Cluster::cfs_update(); # make sure we read new file
6976
6977 eval { rescan($vmid, 1); };
6978 warn $@ if $@;
6979
6980 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
6981 }
6982
6983 sub restore_tar_archive {
6984 my ($archive, $vmid, $user, $opts) = @_;
6985
6986 if ($archive ne '-') {
6987 my $firstfile = tar_archive_read_firstfile($archive);
6988 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
6989 if $firstfile ne 'qemu-server.conf';
6990 }
6991
6992 my $storecfg = PVE::Storage::config();
6993
6994 # avoid zombie disks when restoring over an existing VM -> cleanup first
6995 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
6996 # skiplock=1 because qmrestore has set the 'create' lock itself already
6997 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
6998 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
6999
7000 my $tocmd = "/usr/lib/qemu-server/qmextract";
7001
7002 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
7003 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
7004 $tocmd .= ' --prealloc' if $opts->{prealloc};
7005 $tocmd .= ' --info' if $opts->{info};
7006
7007 # tar option "xf" does not autodetect compression when read from STDIN,
7008 # so we pipe to zcat
7009 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7010 PVE::Tools::shellquote("--to-command=$tocmd");
7011
7012 my $tmpdir = "/var/tmp/vzdumptmp$$";
7013 mkpath $tmpdir;
7014
7015 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7016 local $ENV{VZDUMP_VMID} = $vmid;
7017 local $ENV{VZDUMP_USER} = $user;
7018
7019 my $conffile = PVE::QemuConfig->config_file($vmid);
7020 my $new_conf_raw = '';
7021
7022 # disable interrupts (always do cleanups)
7023 local $SIG{INT} =
7024 local $SIG{TERM} =
7025 local $SIG{QUIT} =
7026 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7027
7028 eval {
7029 # enable interrupts
7030 local $SIG{INT} =
7031 local $SIG{TERM} =
7032 local $SIG{QUIT} =
7033 local $SIG{HUP} =
7034 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7035
7036 if ($archive eq '-') {
7037 print "extracting archive from STDIN\n";
7038 run_command($cmd, input => "<&STDIN");
7039 } else {
7040 print "extracting archive '$archive'\n";
7041 run_command($cmd);
7042 }
7043
7044 return if $opts->{info};
7045
7046 # read new mapping
7047 my $map = {};
7048 my $statfile = "$tmpdir/qmrestore.stat";
7049 if (my $fd = IO::File->new($statfile, "r")) {
7050 while (defined (my $line = <$fd>)) {
7051 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7052 $map->{$1} = $2 if $1;
7053 } else {
7054 print STDERR "unable to parse line in statfile - $line\n";
7055 }
7056 }
7057 $fd->close();
7058 }
7059
7060 my $confsrc = "$tmpdir/qemu-server.conf";
7061
7062 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
7063
7064 my $cookie = { netcount => 0 };
7065 while (defined (my $line = <$srcfd>)) {
7066 $new_conf_raw .= restore_update_config_line(
7067 $cookie,
7068 $map,
7069 $line,
7070 $opts->{unique},
7071 );
7072 }
7073
7074 $srcfd->close();
7075 };
7076 if (my $err = $@) {
7077 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7078 die $err;
7079 }
7080
7081 rmtree $tmpdir;
7082
7083 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7084
7085 PVE::Cluster::cfs_update(); # make sure we read new file
7086
7087 eval { rescan($vmid, 1); };
7088 warn $@ if $@;
7089 };
7090
7091 sub foreach_storage_used_by_vm {
7092 my ($conf, $func) = @_;
7093
7094 my $sidhash = {};
7095
7096 PVE::QemuConfig->foreach_volume($conf, sub {
7097 my ($ds, $drive) = @_;
7098 return if drive_is_cdrom($drive);
7099
7100 my $volid = $drive->{file};
7101
7102 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7103 $sidhash->{$sid} = $sid if $sid;
7104 });
7105
7106 foreach my $sid (sort keys %$sidhash) {
7107 &$func($sid);
7108 }
7109 }
7110
7111 my $qemu_snap_storage = {
7112 rbd => 1,
7113 };
7114 sub do_snapshots_with_qemu {
7115 my ($storecfg, $volid, $deviceid) = @_;
7116
7117 return if $deviceid =~ m/tpmstate0/;
7118
7119 my $storage_name = PVE::Storage::parse_volume_id($volid);
7120 my $scfg = $storecfg->{ids}->{$storage_name};
7121 die "could not find storage '$storage_name'\n" if !defined($scfg);
7122
7123 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7124 return 1;
7125 }
7126
7127 if ($volid =~ m/\.(qcow2|qed)$/){
7128 return 1;
7129 }
7130
7131 return;
7132 }
7133
7134 sub qga_check_running {
7135 my ($vmid, $nowarn) = @_;
7136
7137 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7138 if ($@) {
7139 warn "Qemu Guest Agent is not running - $@" if !$nowarn;
7140 return 0;
7141 }
7142 return 1;
7143 }
7144
7145 sub template_create {
7146 my ($vmid, $conf, $disk) = @_;
7147
7148 my $storecfg = PVE::Storage::config();
7149
7150 PVE::QemuConfig->foreach_volume($conf, sub {
7151 my ($ds, $drive) = @_;
7152
7153 return if drive_is_cdrom($drive);
7154 return if $disk && $ds ne $disk;
7155
7156 my $volid = $drive->{file};
7157 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7158
7159 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7160 $drive->{file} = $voliddst;
7161 $conf->{$ds} = print_drive($drive);
7162 PVE::QemuConfig->write_config($vmid, $conf);
7163 });
7164 }
7165
7166 sub convert_iscsi_path {
7167 my ($path) = @_;
7168
7169 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7170 my $portal = $1;
7171 my $target = $2;
7172 my $lun = $3;
7173
7174 my $initiator_name = get_initiator_name();
7175
7176 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7177 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7178 }
7179
7180 die "cannot convert iscsi path '$path', unkown format\n";
7181 }
7182
7183 sub qemu_img_convert {
7184 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized) = @_;
7185
7186 my $storecfg = PVE::Storage::config();
7187 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7188 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7189
7190 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7191
7192 my $cachemode;
7193 my $src_path;
7194 my $src_is_iscsi = 0;
7195 my $src_format;
7196
7197 if ($src_storeid) {
7198 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7199 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7200 $src_format = qemu_img_format($src_scfg, $src_volname);
7201 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7202 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7203 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7204 } elsif (-f $src_volid) {
7205 $src_path = $src_volid;
7206 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7207 $src_format = $1;
7208 }
7209 }
7210
7211 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7212
7213 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7214 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7215 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7216 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7217
7218 my $cmd = [];
7219 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7220 push @$cmd, '-l', "snapshot.name=$snapname"
7221 if $snapname && $src_format && $src_format eq "qcow2";
7222 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7223 push @$cmd, '-T', $cachemode if defined($cachemode);
7224
7225 if ($src_is_iscsi) {
7226 push @$cmd, '--image-opts';
7227 $src_path = convert_iscsi_path($src_path);
7228 } elsif ($src_format) {
7229 push @$cmd, '-f', $src_format;
7230 }
7231
7232 if ($dst_is_iscsi) {
7233 push @$cmd, '--target-image-opts';
7234 $dst_path = convert_iscsi_path($dst_path);
7235 } else {
7236 push @$cmd, '-O', $dst_format;
7237 }
7238
7239 push @$cmd, $src_path;
7240
7241 if (!$dst_is_iscsi && $is_zero_initialized) {
7242 push @$cmd, "zeroinit:$dst_path";
7243 } else {
7244 push @$cmd, $dst_path;
7245 }
7246
7247 my $parser = sub {
7248 my $line = shift;
7249 if($line =~ m/\((\S+)\/100\%\)/){
7250 my $percent = $1;
7251 my $transferred = int($size * $percent / 100);
7252 my $total_h = render_bytes($size, 1);
7253 my $transferred_h = render_bytes($transferred, 1);
7254
7255 print "transferred $transferred_h of $total_h ($percent%)\n";
7256 }
7257
7258 };
7259
7260 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7261 my $err = $@;
7262 die "copy failed: $err" if $err;
7263 }
7264
7265 sub qemu_img_format {
7266 my ($scfg, $volname) = @_;
7267
7268 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7269 return $1;
7270 } else {
7271 return "raw";
7272 }
7273 }
7274
7275 sub qemu_drive_mirror {
7276 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7277
7278 $jobs = {} if !$jobs;
7279
7280 my $qemu_target;
7281 my $format;
7282 $jobs->{"drive-$drive"} = {};
7283
7284 if ($dst_volid =~ /^nbd:/) {
7285 $qemu_target = $dst_volid;
7286 $format = "nbd";
7287 } else {
7288 my $storecfg = PVE::Storage::config();
7289 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7290
7291 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7292
7293 $format = qemu_img_format($dst_scfg, $dst_volname);
7294
7295 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7296
7297 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7298 }
7299
7300 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7301 $opts->{format} = $format if $format;
7302
7303 if (defined($src_bitmap)) {
7304 $opts->{sync} = 'incremental';
7305 $opts->{bitmap} = $src_bitmap;
7306 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7307 }
7308
7309 if (defined($bwlimit)) {
7310 $opts->{speed} = $bwlimit * 1024;
7311 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7312 } else {
7313 print "drive mirror is starting for drive-$drive\n";
7314 }
7315
7316 # if a job already runs for this device we get an error, catch it for cleanup
7317 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7318 if (my $err = $@) {
7319 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7320 warn "$@\n" if $@;
7321 die "mirroring error: $err\n";
7322 }
7323
7324 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7325 }
7326
7327 # $completion can be either
7328 # 'complete': wait until all jobs are ready, block-job-complete them (default)
7329 # 'cancel': wait until all jobs are ready, block-job-cancel them
7330 # 'skip': wait until all jobs are ready, return with block jobs in ready state
7331 # 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7332 sub qemu_drive_mirror_monitor {
7333 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7334
7335 $completion //= 'complete';
7336 $op //= "mirror";
7337
7338 eval {
7339 my $err_complete = 0;
7340
7341 my $starttime = time ();
7342 while (1) {
7343 die "block job ('$op') timed out\n" if $err_complete > 300;
7344
7345 my $stats = mon_cmd($vmid, "query-block-jobs");
7346 my $ctime = time();
7347
7348 my $running_jobs = {};
7349 for my $stat (@$stats) {
7350 next if $stat->{type} ne $op;
7351 $running_jobs->{$stat->{device}} = $stat;
7352 }
7353
7354 my $readycounter = 0;
7355
7356 for my $job_id (sort keys %$jobs) {
7357 my $job = $running_jobs->{$job_id};
7358
7359 my $vanished = !defined($job);
7360 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7361 if($complete || ($vanished && $completion eq 'auto')) {
7362 print "$job_id: $op-job finished\n";
7363 delete $jobs->{$job_id};
7364 next;
7365 }
7366
7367 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7368
7369 my $busy = $job->{busy};
7370 my $ready = $job->{ready};
7371 if (my $total = $job->{len}) {
7372 my $transferred = $job->{offset} || 0;
7373 my $remaining = $total - $transferred;
7374 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7375
7376 my $duration = $ctime - $starttime;
7377 my $total_h = render_bytes($total, 1);
7378 my $transferred_h = render_bytes($transferred, 1);
7379
7380 my $status = sprintf(
7381 "transferred $transferred_h of $total_h ($percent%%) in %s",
7382 render_duration($duration),
7383 );
7384
7385 if ($ready) {
7386 if ($busy) {
7387 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7388 } else {
7389 $status .= ", ready";
7390 }
7391 }
7392 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7393 $jobs->{$job_id}->{ready} = $ready;
7394 }
7395
7396 $readycounter++ if $job->{ready};
7397 }
7398
7399 last if scalar(keys %$jobs) == 0;
7400
7401 if ($readycounter == scalar(keys %$jobs)) {
7402 print "all '$op' jobs are ready\n";
7403
7404 # do the complete later (or has already been done)
7405 last if $completion eq 'skip' || $completion eq 'auto';
7406
7407 if ($vmiddst && $vmiddst != $vmid) {
7408 my $agent_running = $qga && qga_check_running($vmid);
7409 if ($agent_running) {
7410 print "freeze filesystem\n";
7411 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
7412 } else {
7413 print "suspend vm\n";
7414 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
7415 }
7416
7417 # if we clone a disk for a new target vm, we don't switch the disk
7418 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
7419
7420 if ($agent_running) {
7421 print "unfreeze filesystem\n";
7422 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
7423 } else {
7424 print "resume vm\n";
7425 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7426 }
7427
7428 last;
7429 } else {
7430
7431 for my $job_id (sort keys %$jobs) {
7432 # try to switch the disk if source and destination are on the same guest
7433 print "$job_id: Completing block job_id...\n";
7434
7435 my $op;
7436 if ($completion eq 'complete') {
7437 $op = 'block-job-complete';
7438 } elsif ($completion eq 'cancel') {
7439 $op = 'block-job-cancel';
7440 } else {
7441 die "invalid completion value: $completion\n";
7442 }
7443 eval { mon_cmd($vmid, $op, device => $job_id) };
7444 if ($@ =~ m/cannot be completed/) {
7445 print "$job_id: block job cannot be completed, trying again.\n";
7446 $err_complete++;
7447 }else {
7448 print "$job_id: Completed successfully.\n";
7449 $jobs->{$job_id}->{complete} = 1;
7450 }
7451 }
7452 }
7453 }
7454 sleep 1;
7455 }
7456 };
7457 my $err = $@;
7458
7459 if ($err) {
7460 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7461 die "block job ($op) error: $err";
7462 }
7463 }
7464
7465 sub qemu_blockjobs_cancel {
7466 my ($vmid, $jobs) = @_;
7467
7468 foreach my $job (keys %$jobs) {
7469 print "$job: Cancelling block job\n";
7470 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
7471 $jobs->{$job}->{cancel} = 1;
7472 }
7473
7474 while (1) {
7475 my $stats = mon_cmd($vmid, "query-block-jobs");
7476
7477 my $running_jobs = {};
7478 foreach my $stat (@$stats) {
7479 $running_jobs->{$stat->{device}} = $stat;
7480 }
7481
7482 foreach my $job (keys %$jobs) {
7483
7484 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
7485 print "$job: Done.\n";
7486 delete $jobs->{$job};
7487 }
7488 }
7489
7490 last if scalar(keys %$jobs) == 0;
7491
7492 sleep 1;
7493 }
7494 }
7495
7496 sub clone_disk {
7497 my ($storecfg, $vmid, $running, $drivename, $drive, $snapname,
7498 $newvmid, $storage, $format, $full, $newvollist, $jobs, $completion, $qga, $bwlimit, $conf) = @_;
7499
7500 my $newvolid;
7501
7502 if (!$full) {
7503 print "create linked clone of drive $drivename ($drive->{file})\n";
7504 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
7505 push @$newvollist, $newvolid;
7506 } else {
7507
7508 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
7509 $storeid = $storage if $storage;
7510
7511 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
7512
7513 print "create full clone of drive $drivename ($drive->{file})\n";
7514 my $name = undef;
7515 my $size = undef;
7516 if (drive_is_cloudinit($drive)) {
7517 $name = "vm-$newvmid-cloudinit";
7518 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7519 if ($scfg->{path}) {
7520 $name .= ".$dst_format";
7521 }
7522 $snapname = undef;
7523 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
7524 } elsif ($drivename eq 'efidisk0') {
7525 $size = get_efivars_size($conf);
7526 } elsif ($drivename eq 'tpmstate0') {
7527 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7528 } else {
7529 ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
7530 }
7531 $newvolid = PVE::Storage::vdisk_alloc(
7532 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
7533 );
7534 push @$newvollist, $newvolid;
7535
7536 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
7537
7538 if (drive_is_cloudinit($drive)) {
7539 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
7540 # if this is the case, we have to complete any block-jobs still there from
7541 # previous drive-mirrors
7542 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
7543 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
7544 }
7545 goto no_data_clone;
7546 }
7547
7548 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
7549 if (!$running || $snapname) {
7550 # TODO: handle bwlimits
7551 if ($drivename eq 'efidisk0') {
7552 # the relevant data on the efidisk may be smaller than the source
7553 # e.g. on RBD/ZFS, so we use dd to copy only the amount
7554 # that is given by the OVMF_VARS.fd
7555 my $src_path = PVE::Storage::path($storecfg, $drive->{file});
7556 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
7557
7558 # better for Ceph if block size is not too small, see bug #3324
7559 my $bs = 1024*1024;
7560
7561 run_command(['qemu-img', 'dd', '-n', '-O', $dst_format, "bs=$bs", "osize=$size",
7562 "if=$src_path", "of=$dst_path"]);
7563 } else {
7564 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit);
7565 }
7566 } else {
7567
7568 die "cannot move TPM state while VM is running\n" if $drivename eq 'tpmstate0';
7569
7570 my $kvmver = get_running_qemu_version ($vmid);
7571 if (!min_version($kvmver, 2, 7)) {
7572 die "drive-mirror with iothread requires qemu version 2.7 or higher\n"
7573 if $drive->{iothread};
7574 }
7575
7576 qemu_drive_mirror($vmid, $drivename, $newvolid, $newvmid, $sparseinit, $jobs,
7577 $completion, $qga, $bwlimit);
7578 }
7579 }
7580
7581 no_data_clone:
7582 my ($size) = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
7583
7584 my $disk = $drive;
7585 $disk->{format} = undef;
7586 $disk->{file} = $newvolid;
7587 $disk->{size} = $size if defined($size);
7588
7589 return $disk;
7590 }
7591
7592 sub get_running_qemu_version {
7593 my ($vmid) = @_;
7594 my $res = mon_cmd($vmid, "query-version");
7595 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
7596 }
7597
7598 sub qemu_use_old_bios_files {
7599 my ($machine_type) = @_;
7600
7601 return if !$machine_type;
7602
7603 my $use_old_bios_files = undef;
7604
7605 if ($machine_type =~ m/^(\S+)\.pxe$/) {
7606 $machine_type = $1;
7607 $use_old_bios_files = 1;
7608 } else {
7609 my $version = extract_version($machine_type, kvm_user_version());
7610 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
7611 # load new efi bios files on migration. So this hack is required to allow
7612 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
7613 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
7614 $use_old_bios_files = !min_version($version, 2, 4);
7615 }
7616
7617 return ($use_old_bios_files, $machine_type);
7618 }
7619
7620 sub get_efivars_size {
7621 my ($conf) = @_;
7622 my $arch = get_vm_arch($conf);
7623 my $efidisk = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
7624 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
7625 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7626 die "uefi vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
7627 return -s $ovmf_vars;
7628 }
7629
7630 sub update_efidisk_size {
7631 my ($conf) = @_;
7632
7633 return if !defined($conf->{efidisk0});
7634
7635 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
7636 $disk->{size} = get_efivars_size($conf);
7637 $conf->{efidisk0} = print_drive($disk);
7638
7639 return;
7640 }
7641
7642 sub update_tpmstate_size {
7643 my ($conf) = @_;
7644
7645 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
7646 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7647 $conf->{tpmstate0} = print_drive($disk);
7648 }
7649
7650 sub create_efidisk($$$$$$$) {
7651 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
7652
7653 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7654 die "EFI vars default image not found\n" if ! -f $ovmf_vars;
7655
7656 my $vars_size_b = -s $ovmf_vars;
7657 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
7658 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
7659 PVE::Storage::activate_volumes($storecfg, [$volid]);
7660
7661 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
7662 my ($size) = PVE::Storage::volume_size_info($storecfg, $volid, 3);
7663
7664 return ($volid, $size/1024);
7665 }
7666
7667 sub vm_iothreads_list {
7668 my ($vmid) = @_;
7669
7670 my $res = mon_cmd($vmid, 'query-iothreads');
7671
7672 my $iothreads = {};
7673 foreach my $iothread (@$res) {
7674 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
7675 }
7676
7677 return $iothreads;
7678 }
7679
7680 sub scsihw_infos {
7681 my ($conf, $drive) = @_;
7682
7683 my $maxdev = 0;
7684
7685 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
7686 $maxdev = 7;
7687 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
7688 $maxdev = 1;
7689 } else {
7690 $maxdev = 256;
7691 }
7692
7693 my $controller = int($drive->{index} / $maxdev);
7694 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
7695 ? "virtioscsi"
7696 : "scsihw";
7697
7698 return ($maxdev, $controller, $controller_prefix);
7699 }
7700
7701 sub windows_version {
7702 my ($ostype) = @_;
7703
7704 return 0 if !$ostype;
7705
7706 my $winversion = 0;
7707
7708 if($ostype eq 'wxp' || $ostype eq 'w2k3' || $ostype eq 'w2k') {
7709 $winversion = 5;
7710 } elsif($ostype eq 'w2k8' || $ostype eq 'wvista') {
7711 $winversion = 6;
7712 } elsif ($ostype =~ m/^win(\d+)$/) {
7713 $winversion = $1;
7714 }
7715
7716 return $winversion;
7717 }
7718
7719 sub resolve_dst_disk_format {
7720 my ($storecfg, $storeid, $src_volname, $format) = @_;
7721 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
7722
7723 if (!$format) {
7724 # if no target format is specified, use the source disk format as hint
7725 if ($src_volname) {
7726 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7727 $format = qemu_img_format($scfg, $src_volname);
7728 } else {
7729 return $defFormat;
7730 }
7731 }
7732
7733 # test if requested format is supported - else use default
7734 my $supported = grep { $_ eq $format } @$validFormats;
7735 $format = $defFormat if !$supported;
7736 return $format;
7737 }
7738
7739 # NOTE: if this logic changes, please update docs & possibly gui logic
7740 sub find_vmstate_storage {
7741 my ($conf, $storecfg) = @_;
7742
7743 # first, return storage from conf if set
7744 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
7745
7746 my ($target, $shared, $local);
7747
7748 foreach_storage_used_by_vm($conf, sub {
7749 my ($sid) = @_;
7750 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
7751 my $dst = $scfg->{shared} ? \$shared : \$local;
7752 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
7753 });
7754
7755 # second, use shared storage where VM has at least one disk
7756 # third, use local storage where VM has at least one disk
7757 # fall back to local storage
7758 $target = $shared // $local // 'local';
7759
7760 return $target;
7761 }
7762
7763 sub generate_uuid {
7764 my ($uuid, $uuid_str);
7765 UUID::generate($uuid);
7766 UUID::unparse($uuid, $uuid_str);
7767 return $uuid_str;
7768 }
7769
7770 sub generate_smbios1_uuid {
7771 return "uuid=".generate_uuid();
7772 }
7773
7774 sub nbd_stop {
7775 my ($vmid) = @_;
7776
7777 mon_cmd($vmid, 'nbd-server-stop');
7778 }
7779
7780 sub create_reboot_request {
7781 my ($vmid) = @_;
7782 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
7783 or die "failed to create reboot trigger file: $!\n";
7784 close($fh);
7785 }
7786
7787 sub clear_reboot_request {
7788 my ($vmid) = @_;
7789 my $path = "/run/qemu-server/$vmid.reboot";
7790 my $res = 0;
7791
7792 $res = unlink($path);
7793 die "could not remove reboot request for $vmid: $!"
7794 if !$res && $! != POSIX::ENOENT;
7795
7796 return $res;
7797 }
7798
7799 sub bootorder_from_legacy {
7800 my ($conf, $bootcfg) = @_;
7801
7802 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
7803 my $bootindex_hash = {};
7804 my $i = 1;
7805 foreach my $o (split(//, $boot)) {
7806 $bootindex_hash->{$o} = $i*100;
7807 $i++;
7808 }
7809
7810 my $bootorder = {};
7811
7812 PVE::QemuConfig->foreach_volume($conf, sub {
7813 my ($ds, $drive) = @_;
7814
7815 if (drive_is_cdrom ($drive, 1)) {
7816 if ($bootindex_hash->{d}) {
7817 $bootorder->{$ds} = $bootindex_hash->{d};
7818 $bootindex_hash->{d} += 1;
7819 }
7820 } elsif ($bootindex_hash->{c}) {
7821 $bootorder->{$ds} = $bootindex_hash->{c}
7822 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
7823 $bootindex_hash->{c} += 1;
7824 }
7825 });
7826
7827 if ($bootindex_hash->{n}) {
7828 for (my $i = 0; $i < $MAX_NETS; $i++) {
7829 my $netname = "net$i";
7830 next if !$conf->{$netname};
7831 $bootorder->{$netname} = $bootindex_hash->{n};
7832 $bootindex_hash->{n} += 1;
7833 }
7834 }
7835
7836 return $bootorder;
7837 }
7838
7839 # Generate default device list for 'boot: order=' property. Matches legacy
7840 # default boot order, but with explicit device names. This is important, since
7841 # the fallback for when neither 'order' nor the old format is specified relies
7842 # on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
7843 sub get_default_bootdevices {
7844 my ($conf) = @_;
7845
7846 my @ret = ();
7847
7848 # harddisk
7849 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
7850 push @ret, $first if $first;
7851
7852 # cdrom
7853 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
7854 push @ret, $first if $first;
7855
7856 # network
7857 for (my $i = 0; $i < $MAX_NETS; $i++) {
7858 my $netname = "net$i";
7859 next if !$conf->{$netname};
7860 push @ret, $netname;
7861 last;
7862 }
7863
7864 return \@ret;
7865 }
7866
7867 sub device_bootorder {
7868 my ($conf) = @_;
7869
7870 return bootorder_from_legacy($conf) if !defined($conf->{boot});
7871
7872 my $boot = parse_property_string($boot_fmt, $conf->{boot});
7873
7874 my $bootorder = {};
7875 if (!defined($boot) || $boot->{legacy}) {
7876 $bootorder = bootorder_from_legacy($conf, $boot);
7877 } elsif ($boot->{order}) {
7878 my $i = 100; # start at 100 to allow user to insert devices before us with -args
7879 for my $dev (PVE::Tools::split_list($boot->{order})) {
7880 $bootorder->{$dev} = $i++;
7881 }
7882 }
7883
7884 return $bootorder;
7885 }
7886
7887 sub register_qmeventd_handle {
7888 my ($vmid) = @_;
7889
7890 my $fh;
7891 my $peer = "/var/run/qmeventd.sock";
7892 my $count = 0;
7893
7894 for (;;) {
7895 $count++;
7896 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
7897 last if $fh;
7898 if ($! != EINTR && $! != EAGAIN) {
7899 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
7900 }
7901 if ($count > 4) {
7902 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
7903 . "after $count retries\n";
7904 }
7905 usleep(25000);
7906 }
7907
7908 # send handshake to mark VM as backing up
7909 print $fh to_json({vzdump => {vmid => "$vmid"}});
7910
7911 # return handle to be closed later when inhibit is no longer required
7912 return $fh;
7913 }
7914
7915 # bash completion helper
7916
7917 sub complete_backup_archives {
7918 my ($cmdname, $pname, $cvalue) = @_;
7919
7920 my $cfg = PVE::Storage::config();
7921
7922 my $storeid;
7923
7924 if ($cvalue =~ m/^([^:]+):/) {
7925 $storeid = $1;
7926 }
7927
7928 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
7929
7930 my $res = [];
7931 foreach my $id (keys %$data) {
7932 foreach my $item (@{$data->{$id}}) {
7933 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
7934 push @$res, $item->{volid} if defined($item->{volid});
7935 }
7936 }
7937
7938 return $res;
7939 }
7940
7941 my $complete_vmid_full = sub {
7942 my ($running) = @_;
7943
7944 my $idlist = vmstatus();
7945
7946 my $res = [];
7947
7948 foreach my $id (keys %$idlist) {
7949 my $d = $idlist->{$id};
7950 if (defined($running)) {
7951 next if $d->{template};
7952 next if $running && $d->{status} ne 'running';
7953 next if !$running && $d->{status} eq 'running';
7954 }
7955 push @$res, $id;
7956
7957 }
7958 return $res;
7959 };
7960
7961 sub complete_vmid {
7962 return &$complete_vmid_full();
7963 }
7964
7965 sub complete_vmid_stopped {
7966 return &$complete_vmid_full(0);
7967 }
7968
7969 sub complete_vmid_running {
7970 return &$complete_vmid_full(1);
7971 }
7972
7973 sub complete_storage {
7974
7975 my $cfg = PVE::Storage::config();
7976 my $ids = $cfg->{ids};
7977
7978 my $res = [];
7979 foreach my $sid (keys %$ids) {
7980 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
7981 next if !$ids->{$sid}->{content}->{images};
7982 push @$res, $sid;
7983 }
7984
7985 return $res;
7986 }
7987
7988 sub complete_migration_storage {
7989 my ($cmd, $param, $current_value, $all_args) = @_;
7990
7991 my $targetnode = @$all_args[1];
7992
7993 my $cfg = PVE::Storage::config();
7994 my $ids = $cfg->{ids};
7995
7996 my $res = [];
7997 foreach my $sid (keys %$ids) {
7998 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
7999 next if !$ids->{$sid}->{content}->{images};
8000 push @$res, $sid;
8001 }
8002
8003 return $res;
8004 }
8005
8006 sub vm_is_paused {
8007 my ($vmid) = @_;
8008 my $qmpstatus = eval {
8009 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8010 mon_cmd($vmid, "query-status");
8011 };
8012 warn "$@\n" if $@;
8013 return $qmpstatus && $qmpstatus->{status} eq "paused";
8014 }
8015
8016 sub check_volume_storage_type {
8017 my ($storecfg, $vol) = @_;
8018
8019 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8020 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8021 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8022
8023 die "storage '$storeid' does not support content-type '$vtype'\n"
8024 if !$scfg->{content}->{$vtype};
8025
8026 return 1;
8027 }
8028
8029 1;