]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
use non SMM ovmf code file for i440fx machines
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5
6 use Cwd 'abs_path';
7 use Digest::SHA;
8 use Fcntl ':flock';
9 use Fcntl;
10 use File::Basename;
11 use File::Copy qw(copy);
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use IO::Dir;
16 use IO::File;
17 use IO::Handle;
18 use IO::Select;
19 use IO::Socket::UNIX;
20 use IPC::Open3;
21 use JSON;
22 use MIME::Base64;
23 use POSIX;
24 use Storable qw(dclone);
25 use Time::HiRes qw(gettimeofday usleep);
26 use URI::Escape;
27 use UUID;
28
29 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
30 use PVE::CGroup;
31 use PVE::DataCenterConfig;
32 use PVE::Exception qw(raise raise_param_exc);
33 use PVE::Format qw(render_duration render_bytes);
34 use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
35 use PVE::INotify;
36 use PVE::JSONSchema qw(get_standard_option parse_property_string);
37 use PVE::ProcFSTools;
38 use PVE::PBSClient;
39 use PVE::RPCEnvironment;
40 use PVE::Storage;
41 use PVE::SysFSTools;
42 use PVE::Systemd;
43 use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
44
45 use PVE::QMPClient;
46 use PVE::QemuConfig;
47 use PVE::QemuServer::Helpers qw(min_version config_aware_timeout);
48 use PVE::QemuServer::Cloudinit;
49 use PVE::QemuServer::CGroup;
50 use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
51 use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
52 use PVE::QemuServer::Machine;
53 use PVE::QemuServer::Memory;
54 use PVE::QemuServer::Monitor qw(mon_cmd);
55 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
56 use PVE::QemuServer::USB qw(parse_usb_device);
57
58 my $have_sdn;
59 eval {
60 require PVE::Network::SDN::Zones;
61 $have_sdn = 1;
62 };
63
64 my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
65 my $OVMF = {
66 x86_64 => {
67 '4m-no-smm' => [
68 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
69 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
70 ],
71 '4m-no-smm-ms' => [
72 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
73 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
74 ],
75 '4m' => [
76 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
77 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
78 ],
79 '4m-ms' => [
80 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
81 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
82 ],
83 default => [
84 "$EDK2_FW_BASE/OVMF_CODE.fd",
85 "$EDK2_FW_BASE/OVMF_VARS.fd",
86 ],
87 },
88 aarch64 => {
89 default => [
90 "$EDK2_FW_BASE/AAVMF_CODE.fd",
91 "$EDK2_FW_BASE/AAVMF_VARS.fd",
92 ],
93 },
94 };
95
96 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
97
98 # Note about locking: we use flock on the config file protect against concurent actions.
99 # Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
100 # 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
101 # But you can ignore this kind of lock with the --skiplock flag.
102
103 cfs_register_file('/qemu-server/',
104 \&parse_vm_config,
105 \&write_vm_config);
106
107 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
108 description => "Some command save/restore state from this location.",
109 type => 'string',
110 maxLength => 128,
111 optional => 1,
112 });
113
114 PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
115 description => "Specifies the Qemu machine type.",
116 type => 'string',
117 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
118 maxLength => 40,
119 optional => 1,
120 });
121
122
123 sub map_storage {
124 my ($map, $source) = @_;
125
126 return $source if !defined($map);
127
128 return $map->{entries}->{$source}
129 if $map->{entries} && defined($map->{entries}->{$source});
130
131 return $map->{default} if $map->{default};
132
133 # identity (fallback)
134 return $source;
135 }
136
137 PVE::JSONSchema::register_standard_option('pve-targetstorage', {
138 description => "Mapping from source to target storages. Providing only a single storage ID maps all source storages to that storage. Providing the special value '1' will map each source storage to itself.",
139 type => 'string',
140 format => 'storagepair-list',
141 optional => 1,
142 });
143
144 #no warnings 'redefine';
145
146 my $nodename_cache;
147 sub nodename {
148 $nodename_cache //= PVE::INotify::nodename();
149 return $nodename_cache;
150 }
151
152 my $watchdog_fmt = {
153 model => {
154 default_key => 1,
155 type => 'string',
156 enum => [qw(i6300esb ib700)],
157 description => "Watchdog type to emulate.",
158 default => 'i6300esb',
159 optional => 1,
160 },
161 action => {
162 type => 'string',
163 enum => [qw(reset shutdown poweroff pause debug none)],
164 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
165 optional => 1,
166 },
167 };
168 PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
169
170 my $agent_fmt = {
171 enabled => {
172 description => "Enable/disable communication with a Qemu Guest Agent (QGA) running in the VM.",
173 type => 'boolean',
174 default => 0,
175 default_key => 1,
176 },
177 fstrim_cloned_disks => {
178 description => "Run fstrim after moving a disk or migrating the VM.",
179 type => 'boolean',
180 optional => 1,
181 default => 0
182 },
183 type => {
184 description => "Select the agent type",
185 type => 'string',
186 default => 'virtio',
187 optional => 1,
188 enum => [qw(virtio isa)],
189 },
190 };
191
192 my $vga_fmt = {
193 type => {
194 description => "Select the VGA type.",
195 type => 'string',
196 default => 'std',
197 optional => 1,
198 default_key => 1,
199 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio vmware)],
200 },
201 memory => {
202 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
203 type => 'integer',
204 optional => 1,
205 minimum => 4,
206 maximum => 512,
207 },
208 };
209
210 my $ivshmem_fmt = {
211 size => {
212 type => 'integer',
213 minimum => 1,
214 description => "The size of the file in MB.",
215 },
216 name => {
217 type => 'string',
218 pattern => '[a-zA-Z0-9\-]+',
219 optional => 1,
220 format_description => 'string',
221 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
222 },
223 };
224
225 my $audio_fmt = {
226 device => {
227 type => 'string',
228 enum => [qw(ich9-intel-hda intel-hda AC97)],
229 description => "Configure an audio device."
230 },
231 driver => {
232 type => 'string',
233 enum => ['spice', 'none'],
234 default => 'spice',
235 optional => 1,
236 description => "Driver backend for the audio device."
237 },
238 };
239
240 my $spice_enhancements_fmt = {
241 foldersharing => {
242 type => 'boolean',
243 optional => 1,
244 default => '0',
245 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
246 },
247 videostreaming => {
248 type => 'string',
249 enum => ['off', 'all', 'filter'],
250 default => 'off',
251 optional => 1,
252 description => "Enable video streaming. Uses compression for detected video streams."
253 },
254 };
255
256 my $rng_fmt = {
257 source => {
258 type => 'string',
259 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
260 default_key => 1,
261 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
262 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
263 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
264 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
265 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
266 ." a hardware RNG from the host.",
267 },
268 max_bytes => {
269 type => 'integer',
270 description => "Maximum bytes of entropy allowed to get injected into the guest every"
271 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
272 ." `0` to disable limiting (potentially dangerous!).",
273 optional => 1,
274
275 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
276 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
277 # reading from /dev/urandom
278 default => 1024,
279 },
280 period => {
281 type => 'integer',
282 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
283 ." the guest to retrieve another 'max_bytes' of entropy.",
284 optional => 1,
285 default => 1000,
286 },
287 };
288
289 my $confdesc = {
290 onboot => {
291 optional => 1,
292 type => 'boolean',
293 description => "Specifies whether a VM will be started during system bootup.",
294 default => 0,
295 },
296 autostart => {
297 optional => 1,
298 type => 'boolean',
299 description => "Automatic restart after crash (currently ignored).",
300 default => 0,
301 },
302 hotplug => {
303 optional => 1,
304 type => 'string', format => 'pve-hotplug-features',
305 description => "Selectively enable hotplug features. This is a comma separated list of"
306 ." hotplug features: 'network', 'disk', 'cpu', 'memory' and 'usb'. Use '0' to disable"
307 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`.",
308 default => 'network,disk,usb',
309 },
310 reboot => {
311 optional => 1,
312 type => 'boolean',
313 description => "Allow reboot. If set to '0' the VM exit on reboot.",
314 default => 1,
315 },
316 lock => {
317 optional => 1,
318 type => 'string',
319 description => "Lock/unlock the VM.",
320 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
321 },
322 cpulimit => {
323 optional => 1,
324 type => 'number',
325 description => "Limit of CPU usage.",
326 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
327 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
328 minimum => 0,
329 maximum => 128,
330 default => 0,
331 },
332 cpuunits => {
333 optional => 1,
334 type => 'integer',
335 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
336 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
337 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
338 ." weights of all the other running VMs.",
339 minimum => 2,
340 maximum => 262144,
341 default => 'cgroup v1: 1024, cgroup v2: 100',
342 },
343 memory => {
344 optional => 1,
345 type => 'integer',
346 description => "Amount of RAM for the VM in MB. This is the maximum available memory when"
347 ." you use the balloon device.",
348 minimum => 16,
349 default => 512,
350 },
351 balloon => {
352 optional => 1,
353 type => 'integer',
354 description => "Amount of target RAM for the VM in MB. Using zero disables the ballon driver.",
355 minimum => 0,
356 },
357 shares => {
358 optional => 1,
359 type => 'integer',
360 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
361 ." more memory this VM gets. Number is relative to weights of all other running VMs."
362 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
363 minimum => 0,
364 maximum => 50000,
365 default => 1000,
366 },
367 keyboard => {
368 optional => 1,
369 type => 'string',
370 description => "Keyboard layout for VNC server. The default is read from the"
371 ."'/etc/pve/datacenter.cfg' configuration file. It should not be necessary to set it.",
372 enum => PVE::Tools::kvmkeymaplist(),
373 default => undef,
374 },
375 name => {
376 optional => 1,
377 type => 'string', format => 'dns-name',
378 description => "Set a name for the VM. Only used on the configuration web interface.",
379 },
380 scsihw => {
381 optional => 1,
382 type => 'string',
383 description => "SCSI controller model",
384 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
385 default => 'lsi',
386 },
387 description => {
388 optional => 1,
389 type => 'string',
390 description => "Description for the VM. Shown in the web-interface VM's summary."
391 ." This is saved as comment inside the configuration file.",
392 maxLength => 1024 * 8,
393 },
394 ostype => {
395 optional => 1,
396 type => 'string',
397 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
398 description => "Specify guest operating system.",
399 verbose_description => <<EODESC,
400 Specify guest operating system. This is used to enable special
401 optimization/features for specific operating systems:
402
403 [horizontal]
404 other;; unspecified OS
405 wxp;; Microsoft Windows XP
406 w2k;; Microsoft Windows 2000
407 w2k3;; Microsoft Windows 2003
408 w2k8;; Microsoft Windows 2008
409 wvista;; Microsoft Windows Vista
410 win7;; Microsoft Windows 7
411 win8;; Microsoft Windows 8/2012/2012r2
412 win10;; Microsoft Windows 10/2016/2019
413 win11;; Microsoft Windows 11/2022
414 l24;; Linux 2.4 Kernel
415 l26;; Linux 2.6 - 5.X Kernel
416 solaris;; Solaris/OpenSolaris/OpenIndiania kernel
417 EODESC
418 },
419 boot => {
420 optional => 1,
421 type => 'string', format => 'pve-qm-boot',
422 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
423 ." key or 'legacy=' is deprecated.",
424 },
425 bootdisk => {
426 optional => 1,
427 type => 'string', format => 'pve-qm-bootdisk',
428 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
429 pattern => '(ide|sata|scsi|virtio)\d+',
430 },
431 smp => {
432 optional => 1,
433 type => 'integer',
434 description => "The number of CPUs. Please use option -sockets instead.",
435 minimum => 1,
436 default => 1,
437 },
438 sockets => {
439 optional => 1,
440 type => 'integer',
441 description => "The number of CPU sockets.",
442 minimum => 1,
443 default => 1,
444 },
445 cores => {
446 optional => 1,
447 type => 'integer',
448 description => "The number of cores per socket.",
449 minimum => 1,
450 default => 1,
451 },
452 numa => {
453 optional => 1,
454 type => 'boolean',
455 description => "Enable/disable NUMA.",
456 default => 0,
457 },
458 hugepages => {
459 optional => 1,
460 type => 'string',
461 description => "Enable/disable hugepages memory.",
462 enum => [qw(any 2 1024)],
463 },
464 keephugepages => {
465 optional => 1,
466 type => 'boolean',
467 default => 0,
468 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
469 ." after VM shutdown and can be used for subsequent starts.",
470 },
471 vcpus => {
472 optional => 1,
473 type => 'integer',
474 description => "Number of hotplugged vcpus.",
475 minimum => 1,
476 default => 0,
477 },
478 acpi => {
479 optional => 1,
480 type => 'boolean',
481 description => "Enable/disable ACPI.",
482 default => 1,
483 },
484 agent => {
485 optional => 1,
486 description => "Enable/disable communication with the Qemu Guest Agent and its properties.",
487 type => 'string',
488 format => $agent_fmt,
489 },
490 kvm => {
491 optional => 1,
492 type => 'boolean',
493 description => "Enable/disable KVM hardware virtualization.",
494 default => 1,
495 },
496 tdf => {
497 optional => 1,
498 type => 'boolean',
499 description => "Enable/disable time drift fix.",
500 default => 0,
501 },
502 localtime => {
503 optional => 1,
504 type => 'boolean',
505 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
506 ." the `ostype` indicates a Microsoft Windows OS.",
507 },
508 freeze => {
509 optional => 1,
510 type => 'boolean',
511 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
512 },
513 vga => {
514 optional => 1,
515 type => 'string', format => $vga_fmt,
516 description => "Configure the VGA hardware.",
517 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
518 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
519 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
520 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
521 ." display server. For win* OS you can select how many independent displays you want,"
522 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
523 ." using a serial device as terminal.",
524 },
525 watchdog => {
526 optional => 1,
527 type => 'string', format => 'pve-qm-watchdog',
528 description => "Create a virtual hardware watchdog device.",
529 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
530 ." action), the watchdog must be periodically polled by an agent inside the guest or"
531 ." else the watchdog will reset the guest (or execute the respective action specified)",
532 },
533 startdate => {
534 optional => 1,
535 type => 'string',
536 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
537 description => "Set the initial date of the real time clock. Valid format for date are:"
538 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
539 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
540 default => 'now',
541 },
542 startup => get_standard_option('pve-startup-order'),
543 template => {
544 optional => 1,
545 type => 'boolean',
546 description => "Enable/disable Template.",
547 default => 0,
548 },
549 args => {
550 optional => 1,
551 type => 'string',
552 description => "Arbitrary arguments passed to kvm.",
553 verbose_description => <<EODESCR,
554 Arbitrary arguments passed to kvm, for example:
555
556 args: -no-reboot -no-hpet
557
558 NOTE: this option is for experts only.
559 EODESCR
560 },
561 tablet => {
562 optional => 1,
563 type => 'boolean',
564 default => 1,
565 description => "Enable/disable the USB tablet device.",
566 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
567 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
568 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
569 ." may consider disabling this to save some context switches. This is turned off by"
570 ." default if you use spice (`qm set <vmid> --vga qxl`).",
571 },
572 migrate_speed => {
573 optional => 1,
574 type => 'integer',
575 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
576 minimum => 0,
577 default => 0,
578 },
579 migrate_downtime => {
580 optional => 1,
581 type => 'number',
582 description => "Set maximum tolerated downtime (in seconds) for migrations.",
583 minimum => 0,
584 default => 0.1,
585 },
586 cdrom => {
587 optional => 1,
588 type => 'string', format => 'pve-qm-ide',
589 typetext => '<volume>',
590 description => "This is an alias for option -ide2",
591 },
592 cpu => {
593 optional => 1,
594 description => "Emulated CPU type.",
595 type => 'string',
596 format => 'pve-vm-cpu-conf',
597 },
598 parent => get_standard_option('pve-snapshot-name', {
599 optional => 1,
600 description => "Parent snapshot name. This is used internally, and should not be modified.",
601 }),
602 snaptime => {
603 optional => 1,
604 description => "Timestamp for snapshots.",
605 type => 'integer',
606 minimum => 0,
607 },
608 vmstate => {
609 optional => 1,
610 type => 'string', format => 'pve-volume-id',
611 description => "Reference to a volume which stores the VM state. This is used internally"
612 ." for snapshots.",
613 },
614 vmstatestorage => get_standard_option('pve-storage-id', {
615 description => "Default storage for VM state volumes/files.",
616 optional => 1,
617 }),
618 runningmachine => get_standard_option('pve-qemu-machine', {
619 description => "Specifies the QEMU machine type of the running vm. This is used internally"
620 ." for snapshots.",
621 }),
622 runningcpu => {
623 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
624 ." internally for snapshots.",
625 optional => 1,
626 type => 'string',
627 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
628 format_description => 'QEMU -cpu parameter'
629 },
630 machine => get_standard_option('pve-qemu-machine'),
631 arch => {
632 description => "Virtual processor architecture. Defaults to the host.",
633 optional => 1,
634 type => 'string',
635 enum => [qw(x86_64 aarch64)],
636 },
637 smbios1 => {
638 description => "Specify SMBIOS type 1 fields.",
639 type => 'string', format => 'pve-qm-smbios1',
640 maxLength => 512,
641 optional => 1,
642 },
643 protection => {
644 optional => 1,
645 type => 'boolean',
646 description => "Sets the protection flag of the VM. This will disable the remove VM and"
647 ." remove disk operations.",
648 default => 0,
649 },
650 bios => {
651 optional => 1,
652 type => 'string',
653 enum => [ qw(seabios ovmf) ],
654 description => "Select BIOS implementation.",
655 default => 'seabios',
656 },
657 vmgenid => {
658 type => 'string',
659 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
660 format_description => 'UUID',
661 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
662 ." to disable explicitly.",
663 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
664 ." value identifier to the guest OS. This allows to notify the guest operating system"
665 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
666 ." execution or creation from a template). The guest operating system notices the"
667 ." change, and is then able to react as appropriate by marking its copies of"
668 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
669 ."Note that auto-creation only works when done through API/CLI create or update methods"
670 .", but not when manually editing the config file.",
671 default => "1 (autogenerated)",
672 optional => 1,
673 },
674 hookscript => {
675 type => 'string',
676 format => 'pve-volume-id',
677 optional => 1,
678 description => "Script that will be executed during various steps in the vms lifetime.",
679 },
680 ivshmem => {
681 type => 'string',
682 format => $ivshmem_fmt,
683 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
684 ." the host.",
685 optional => 1,
686 },
687 audio0 => {
688 type => 'string',
689 format => $audio_fmt,
690 description => "Configure a audio device, useful in combination with QXL/Spice.",
691 optional => 1
692 },
693 spice_enhancements => {
694 type => 'string',
695 format => $spice_enhancements_fmt,
696 description => "Configure additional enhancements for SPICE.",
697 optional => 1
698 },
699 tags => {
700 type => 'string', format => 'pve-tag-list',
701 description => 'Tags of the VM. This is only meta information.',
702 optional => 1,
703 },
704 rng0 => {
705 type => 'string',
706 format => $rng_fmt,
707 description => "Configure a VirtIO-based Random Number Generator.",
708 optional => 1,
709 },
710 };
711
712 my $cicustom_fmt = {
713 meta => {
714 type => 'string',
715 optional => 1,
716 description => 'Specify a custom file containing all meta data passed to the VM via"
717 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
718 format => 'pve-volume-id',
719 format_description => 'volume',
720 },
721 network => {
722 type => 'string',
723 optional => 1,
724 description => 'Specify a custom file containing all network data passed to the VM via'
725 .' cloud-init.',
726 format => 'pve-volume-id',
727 format_description => 'volume',
728 },
729 user => {
730 type => 'string',
731 optional => 1,
732 description => 'Specify a custom file containing all user data passed to the VM via'
733 .' cloud-init.',
734 format => 'pve-volume-id',
735 format_description => 'volume',
736 },
737 };
738 PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
739
740 my $confdesc_cloudinit = {
741 citype => {
742 optional => 1,
743 type => 'string',
744 description => 'Specifies the cloud-init configuration format. The default depends on the'
745 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
746 .' and `configdrive2` for windows.',
747 enum => ['configdrive2', 'nocloud', 'opennebula'],
748 },
749 ciuser => {
750 optional => 1,
751 type => 'string',
752 description => "cloud-init: User name to change ssh keys and password for instead of the"
753 ." image's configured default user.",
754 },
755 cipassword => {
756 optional => 1,
757 type => 'string',
758 description => 'cloud-init: Password to assign the user. Using this is generally not'
759 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
760 .' support hashed passwords.',
761 },
762 cicustom => {
763 optional => 1,
764 type => 'string',
765 description => 'cloud-init: Specify custom files to replace the automatically generated'
766 .' ones at start.',
767 format => 'pve-qm-cicustom',
768 },
769 searchdomain => {
770 optional => 1,
771 type => 'string',
772 description => "cloud-init: Sets DNS search domains for a container. Create will'
773 .' automatically use the setting from the host if neither searchdomain nor nameserver'
774 .' are set.",
775 },
776 nameserver => {
777 optional => 1,
778 type => 'string', format => 'address-list',
779 description => "cloud-init: Sets DNS server IP address for a container. Create will'
780 .' automatically use the setting from the host if neither searchdomain nor nameserver'
781 .' are set.",
782 },
783 sshkeys => {
784 optional => 1,
785 type => 'string',
786 format => 'urlencoded',
787 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
788 },
789 };
790
791 # what about other qemu settings ?
792 #cpu => 'string',
793 #machine => 'string',
794 #fda => 'file',
795 #fdb => 'file',
796 #mtdblock => 'file',
797 #sd => 'file',
798 #pflash => 'file',
799 #snapshot => 'bool',
800 #bootp => 'file',
801 ##tftp => 'dir',
802 ##smb => 'dir',
803 #kernel => 'file',
804 #append => 'string',
805 #initrd => 'file',
806 ##soundhw => 'string',
807
808 while (my ($k, $v) = each %$confdesc) {
809 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
810 }
811
812 my $MAX_USB_DEVICES = 5;
813 my $MAX_NETS = 32;
814 my $MAX_SERIAL_PORTS = 4;
815 my $MAX_PARALLEL_PORTS = 3;
816 my $MAX_NUMA = 8;
817
818 my $numa_fmt = {
819 cpus => {
820 type => "string",
821 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
822 description => "CPUs accessing this NUMA node.",
823 format_description => "id[-id];...",
824 },
825 memory => {
826 type => "number",
827 description => "Amount of memory this NUMA node provides.",
828 optional => 1,
829 },
830 hostnodes => {
831 type => "string",
832 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
833 description => "Host NUMA nodes to use.",
834 format_description => "id[-id];...",
835 optional => 1,
836 },
837 policy => {
838 type => 'string',
839 enum => [qw(preferred bind interleave)],
840 description => "NUMA allocation policy.",
841 optional => 1,
842 },
843 };
844 PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
845 my $numadesc = {
846 optional => 1,
847 type => 'string', format => $numa_fmt,
848 description => "NUMA topology.",
849 };
850 PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
851
852 for (my $i = 0; $i < $MAX_NUMA; $i++) {
853 $confdesc->{"numa$i"} = $numadesc;
854 }
855
856 my $nic_model_list = [
857 'e1000',
858 'e1000-82540em',
859 'e1000-82544gc',
860 'e1000-82545em',
861 'e1000e',
862 'i82551',
863 'i82557b',
864 'i82559er',
865 'ne2k_isa',
866 'ne2k_pci',
867 'pcnet',
868 'rtl8139',
869 'virtio',
870 'vmxnet3',
871 ];
872 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
873
874 my $net_fmt_bridge_descr = <<__EOD__;
875 Bridge to attach the network device to. The Proxmox VE standard bridge
876 is called 'vmbr0'.
877
878 If you do not specify a bridge, we create a kvm user (NATed) network
879 device, which provides DHCP and DNS services. The following addresses
880 are used:
881
882 10.0.2.2 Gateway
883 10.0.2.3 DNS Server
884 10.0.2.4 SMB Server
885
886 The DHCP server assign addresses to the guest starting from 10.0.2.15.
887 __EOD__
888
889 my $net_fmt = {
890 macaddr => get_standard_option('mac-addr', {
891 description => "MAC address. That address must be unique withing your network. This is"
892 ." automatically generated if not specified.",
893 }),
894 model => {
895 type => 'string',
896 description => "Network Card Model. The 'virtio' model provides the best performance with"
897 ." very low CPU overhead. If your guest does not support this driver, it is usually"
898 ." best to use 'e1000'.",
899 enum => $nic_model_list,
900 default_key => 1,
901 },
902 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
903 bridge => {
904 type => 'string',
905 description => $net_fmt_bridge_descr,
906 format_description => 'bridge',
907 pattern => '[-_.\w\d]+',
908 optional => 1,
909 },
910 queues => {
911 type => 'integer',
912 minimum => 0, maximum => 16,
913 description => 'Number of packet queues to be used on the device.',
914 optional => 1,
915 },
916 rate => {
917 type => 'number',
918 minimum => 0,
919 description => "Rate limit in mbps (megabytes per second) as floating point number.",
920 optional => 1,
921 },
922 tag => {
923 type => 'integer',
924 minimum => 1, maximum => 4094,
925 description => 'VLAN tag to apply to packets on this interface.',
926 optional => 1,
927 },
928 trunks => {
929 type => 'string',
930 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
931 description => 'VLAN trunks to pass through this interface.',
932 format_description => 'vlanid[;vlanid...]',
933 optional => 1,
934 },
935 firewall => {
936 type => 'boolean',
937 description => 'Whether this interface should be protected by the firewall.',
938 optional => 1,
939 },
940 link_down => {
941 type => 'boolean',
942 description => 'Whether this interface should be disconnected (like pulling the plug).',
943 optional => 1,
944 },
945 mtu => {
946 type => 'integer',
947 minimum => 1, maximum => 65520,
948 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
949 optional => 1,
950 },
951 };
952
953 my $netdesc = {
954 optional => 1,
955 type => 'string', format => $net_fmt,
956 description => "Specify network devices.",
957 };
958
959 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
960
961 my $ipconfig_fmt = {
962 ip => {
963 type => 'string',
964 format => 'pve-ipv4-config',
965 format_description => 'IPv4Format/CIDR',
966 description => 'IPv4 address in CIDR format.',
967 optional => 1,
968 default => 'dhcp',
969 },
970 gw => {
971 type => 'string',
972 format => 'ipv4',
973 format_description => 'GatewayIPv4',
974 description => 'Default gateway for IPv4 traffic.',
975 optional => 1,
976 requires => 'ip',
977 },
978 ip6 => {
979 type => 'string',
980 format => 'pve-ipv6-config',
981 format_description => 'IPv6Format/CIDR',
982 description => 'IPv6 address in CIDR format.',
983 optional => 1,
984 default => 'dhcp',
985 },
986 gw6 => {
987 type => 'string',
988 format => 'ipv6',
989 format_description => 'GatewayIPv6',
990 description => 'Default gateway for IPv6 traffic.',
991 optional => 1,
992 requires => 'ip6',
993 },
994 };
995 PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
996 my $ipconfigdesc = {
997 optional => 1,
998 type => 'string', format => 'pve-qm-ipconfig',
999 description => <<'EODESCR',
1000 cloud-init: Specify IP addresses and gateways for the corresponding interface.
1001
1002 IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1003
1004 The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1005 gateway should be provided.
1006 For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1007 cloud-init 19.4 or newer.
1008
1009 If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1010 dhcp on IPv4.
1011 EODESCR
1012 };
1013 PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1014
1015 for (my $i = 0; $i < $MAX_NETS; $i++) {
1016 $confdesc->{"net$i"} = $netdesc;
1017 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1018 }
1019
1020 foreach my $key (keys %$confdesc_cloudinit) {
1021 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1022 }
1023
1024 PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1025 sub verify_volume_id_or_qm_path {
1026 my ($volid, $noerr) = @_;
1027
1028 if ($volid eq 'none' || $volid eq 'cdrom' || $volid =~ m|^/|) {
1029 return $volid;
1030 }
1031
1032 # if its neither 'none' nor 'cdrom' nor a path, check if its a volume-id
1033 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1034 if ($@) {
1035 return if $noerr;
1036 die $@;
1037 }
1038 return $volid;
1039 }
1040
1041 my $usb_fmt = {
1042 host => {
1043 default_key => 1,
1044 type => 'string', format => 'pve-qm-usb-device',
1045 format_description => 'HOSTUSBDEVICE|spice',
1046 description => <<EODESCR,
1047 The Host USB device or port or the value 'spice'. HOSTUSBDEVICE syntax is:
1048
1049 'bus-port(.port)*' (decimal numbers) or
1050 'vendor_id:product_id' (hexadeciaml numbers) or
1051 'spice'
1052
1053 You can use the 'lsusb -t' command to list existing usb devices.
1054
1055 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1056 machines - use with special care.
1057
1058 The value 'spice' can be used to add a usb redirection devices for spice.
1059 EODESCR
1060 },
1061 usb3 => {
1062 optional => 1,
1063 type => 'boolean',
1064 description => "Specifies whether if given host option is a USB3 device or port.",
1065 default => 0,
1066 },
1067 };
1068
1069 my $usbdesc = {
1070 optional => 1,
1071 type => 'string', format => $usb_fmt,
1072 description => "Configure an USB device (n is 0 to 4).",
1073 };
1074 PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
1075
1076 my $serialdesc = {
1077 optional => 1,
1078 type => 'string',
1079 pattern => '(/dev/.+|socket)',
1080 description => "Create a serial device inside the VM (n is 0 to 3)",
1081 verbose_description => <<EODESCR,
1082 Create a serial device inside the VM (n is 0 to 3), and pass through a
1083 host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1084 host side (use 'qm terminal' to open a terminal connection).
1085
1086 NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1087 use with special care.
1088
1089 CAUTION: Experimental! User reported problems with this option.
1090 EODESCR
1091 };
1092
1093 my $paralleldesc= {
1094 optional => 1,
1095 type => 'string',
1096 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1097 description => "Map host parallel devices (n is 0 to 2).",
1098 verbose_description => <<EODESCR,
1099 Map host parallel devices (n is 0 to 2).
1100
1101 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1102 machines - use with special care.
1103
1104 CAUTION: Experimental! User reported problems with this option.
1105 EODESCR
1106 };
1107
1108 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1109 $confdesc->{"parallel$i"} = $paralleldesc;
1110 }
1111
1112 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1113 $confdesc->{"serial$i"} = $serialdesc;
1114 }
1115
1116 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1117 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1118 }
1119
1120 for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1121 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1122 }
1123
1124 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1125 $confdesc->{"usb$i"} = $usbdesc;
1126 }
1127
1128 my $boot_fmt = {
1129 legacy => {
1130 optional => 1,
1131 default_key => 1,
1132 type => 'string',
1133 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1134 . " Deprecated, use 'order=' instead.",
1135 pattern => '[acdn]{1,4}',
1136 format_description => "[acdn]{1,4}",
1137
1138 # note: this is also the fallback if boot: is not given at all
1139 default => 'cdn',
1140 },
1141 order => {
1142 optional => 1,
1143 type => 'string',
1144 format => 'pve-qm-bootdev-list',
1145 format_description => "device[;device...]",
1146 description => <<EODESC,
1147 The guest will attempt to boot from devices in the order they appear here.
1148
1149 Disks, optical drives and passed-through storage USB devices will be directly
1150 booted from, NICs will load PXE, and PCIe devices will either behave like disks
1151 (e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1152
1153 Note that only devices in this list will be marked as bootable and thus loaded
1154 by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1155 (e.g. software-raid), you need to specify all of them here.
1156
1157 Overrides the deprecated 'legacy=[acdn]*' value when given.
1158 EODESC
1159 },
1160 };
1161 PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1162
1163 PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1164 sub verify_bootdev {
1165 my ($dev, $noerr) = @_;
1166
1167 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1168 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1169
1170 my $check = sub {
1171 my ($base) = @_;
1172 return 0 if $dev !~ m/^$base\d+$/;
1173 return 0 if !$confdesc->{$dev};
1174 return 1;
1175 };
1176
1177 return $dev if $check->("net");
1178 return $dev if $check->("usb");
1179 return $dev if $check->("hostpci");
1180
1181 return if $noerr;
1182 die "invalid boot device '$dev'\n";
1183 }
1184
1185 sub print_bootorder {
1186 my ($devs) = @_;
1187 return "" if !@$devs;
1188 my $data = { order => join(';', @$devs) };
1189 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1190 }
1191
1192 my $kvm_api_version = 0;
1193
1194 sub kvm_version {
1195 return $kvm_api_version if $kvm_api_version;
1196
1197 open my $fh, '<', '/dev/kvm' or return;
1198
1199 # 0xae00 => KVM_GET_API_VERSION
1200 $kvm_api_version = ioctl($fh, 0xae00, 0);
1201 close($fh);
1202
1203 return $kvm_api_version;
1204 }
1205
1206 my $kvm_user_version = {};
1207 my $kvm_mtime = {};
1208
1209 sub kvm_user_version {
1210 my ($binary) = @_;
1211
1212 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1213 my $st = stat($binary);
1214
1215 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1216 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1217 $cachedmtime == $st->mtime;
1218
1219 $kvm_user_version->{$binary} = 'unknown';
1220 $kvm_mtime->{$binary} = $st->mtime;
1221
1222 my $code = sub {
1223 my $line = shift;
1224 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1225 $kvm_user_version->{$binary} = $2;
1226 }
1227 };
1228
1229 eval { run_command([$binary, '--version'], outfunc => $code); };
1230 warn $@ if $@;
1231
1232 return $kvm_user_version->{$binary};
1233
1234 }
1235 my sub extract_version {
1236 my ($machine_type, $version) = @_;
1237 $version = kvm_user_version() if !defined($version);
1238 PVE::QemuServer::Machine::extract_version($machine_type, $version)
1239 }
1240
1241 sub kernel_has_vhost_net {
1242 return -c '/dev/vhost-net';
1243 }
1244
1245 sub option_exists {
1246 my $key = shift;
1247 return defined($confdesc->{$key});
1248 }
1249
1250 my $cdrom_path;
1251 sub get_cdrom_path {
1252
1253 return $cdrom_path if $cdrom_path;
1254
1255 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
1256 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
1257 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
1258 }
1259
1260 sub get_iso_path {
1261 my ($storecfg, $vmid, $cdrom) = @_;
1262
1263 if ($cdrom eq 'cdrom') {
1264 return get_cdrom_path();
1265 } elsif ($cdrom eq 'none') {
1266 return '';
1267 } elsif ($cdrom =~ m|^/|) {
1268 return $cdrom;
1269 } else {
1270 return PVE::Storage::path($storecfg, $cdrom);
1271 }
1272 }
1273
1274 # try to convert old style file names to volume IDs
1275 sub filename_to_volume_id {
1276 my ($vmid, $file, $media) = @_;
1277
1278 if (!($file eq 'none' || $file eq 'cdrom' ||
1279 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1280
1281 return if $file =~ m|/|;
1282
1283 if ($media && $media eq 'cdrom') {
1284 $file = "local:iso/$file";
1285 } else {
1286 $file = "local:$vmid/$file";
1287 }
1288 }
1289
1290 return $file;
1291 }
1292
1293 sub verify_media_type {
1294 my ($opt, $vtype, $media) = @_;
1295
1296 return if !$media;
1297
1298 my $etype;
1299 if ($media eq 'disk') {
1300 $etype = 'images';
1301 } elsif ($media eq 'cdrom') {
1302 $etype = 'iso';
1303 } else {
1304 die "internal error";
1305 }
1306
1307 return if ($vtype eq $etype);
1308
1309 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1310 }
1311
1312 sub cleanup_drive_path {
1313 my ($opt, $storecfg, $drive) = @_;
1314
1315 # try to convert filesystem paths to volume IDs
1316
1317 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1318 ($drive->{file} !~ m|^/dev/.+|) &&
1319 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1320 ($drive->{file} !~ m/^\d+$/)) {
1321 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1322 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1323 if !$vtype;
1324 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1325 verify_media_type($opt, $vtype, $drive->{media});
1326 $drive->{file} = $volid;
1327 }
1328
1329 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1330 }
1331
1332 sub parse_hotplug_features {
1333 my ($data) = @_;
1334
1335 my $res = {};
1336
1337 return $res if $data eq '0';
1338
1339 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1340
1341 foreach my $feature (PVE::Tools::split_list($data)) {
1342 if ($feature =~ m/^(network|disk|cpu|memory|usb)$/) {
1343 $res->{$1} = 1;
1344 } else {
1345 die "invalid hotplug feature '$feature'\n";
1346 }
1347 }
1348 return $res;
1349 }
1350
1351 PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1352 sub pve_verify_hotplug_features {
1353 my ($value, $noerr) = @_;
1354
1355 return $value if parse_hotplug_features($value);
1356
1357 return if $noerr;
1358
1359 die "unable to parse hotplug option\n";
1360 }
1361
1362 sub scsi_inquiry {
1363 my($fh, $noerr) = @_;
1364
1365 my $SG_IO = 0x2285;
1366 my $SG_GET_VERSION_NUM = 0x2282;
1367
1368 my $versionbuf = "\x00" x 8;
1369 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1370 if (!$ret) {
1371 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1372 return;
1373 }
1374 my $version = unpack("I", $versionbuf);
1375 if ($version < 30000) {
1376 die "scsi generic interface too old\n" if !$noerr;
1377 return;
1378 }
1379
1380 my $buf = "\x00" x 36;
1381 my $sensebuf = "\x00" x 8;
1382 my $cmd = pack("C x3 C x1", 0x12, 36);
1383
1384 # see /usr/include/scsi/sg.h
1385 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1386
1387 my $packet = pack($sg_io_hdr_t, ord('S'), -3, length($cmd),
1388 length($sensebuf), 0, length($buf), $buf,
1389 $cmd, $sensebuf, 6000);
1390
1391 $ret = ioctl($fh, $SG_IO, $packet);
1392 if (!$ret) {
1393 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1394 return;
1395 }
1396
1397 my @res = unpack($sg_io_hdr_t, $packet);
1398 if ($res[17] || $res[18]) {
1399 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1400 return;
1401 }
1402
1403 my $res = {};
1404 (my $byte0, my $byte1, $res->{vendor},
1405 $res->{product}, $res->{revision}) = unpack("C C x6 A8 A16 A4", $buf);
1406
1407 $res->{removable} = $byte1 & 128 ? 1 : 0;
1408 $res->{type} = $byte0 & 31;
1409
1410 return $res;
1411 }
1412
1413 sub path_is_scsi {
1414 my ($path) = @_;
1415
1416 my $fh = IO::File->new("+<$path") || return;
1417 my $res = scsi_inquiry($fh, 1);
1418 close($fh);
1419
1420 return $res;
1421 }
1422
1423 sub print_tabletdevice_full {
1424 my ($conf, $arch) = @_;
1425
1426 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1427
1428 # we use uhci for old VMs because tablet driver was buggy in older qemu
1429 my $usbbus;
1430 if (PVE::QemuServer::Machine::machine_type_is_q35($conf) || $arch eq 'aarch64') {
1431 $usbbus = 'ehci';
1432 } else {
1433 $usbbus = 'uhci';
1434 }
1435
1436 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1437 }
1438
1439 sub print_keyboarddevice_full {
1440 my ($conf, $arch, $machine) = @_;
1441
1442 return if $arch ne 'aarch64';
1443
1444 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1445 }
1446
1447 my sub get_drive_id {
1448 my ($drive) = @_;
1449 return "$drive->{interface}$drive->{index}";
1450 }
1451
1452 sub print_drivedevice_full {
1453 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1454
1455 my $device = '';
1456 my $maxdev = 0;
1457
1458 my $drive_id = get_drive_id($drive);
1459 if ($drive->{interface} eq 'virtio') {
1460 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1461 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1462 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1463 } elsif ($drive->{interface} eq 'scsi') {
1464
1465 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1466 my $unit = $drive->{index} % $maxdev;
1467 my $devicetype = 'hd';
1468 my $path = '';
1469 if (drive_is_cdrom($drive)) {
1470 $devicetype = 'cd';
1471 } else {
1472 if ($drive->{file} =~ m|^/|) {
1473 $path = $drive->{file};
1474 if (my $info = path_is_scsi($path)) {
1475 if ($info->{type} == 0 && $drive->{scsiblock}) {
1476 $devicetype = 'block';
1477 } elsif ($info->{type} == 1) { # tape
1478 $devicetype = 'generic';
1479 }
1480 }
1481 } else {
1482 $path = PVE::Storage::path($storecfg, $drive->{file});
1483 }
1484
1485 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
1486 my $version = extract_version($machine_type, kvm_user_version());
1487 if ($path =~ m/^iscsi\:\/\// &&
1488 !min_version($version, 4, 1)) {
1489 $devicetype = 'generic';
1490 }
1491 }
1492
1493 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1494 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
1495 } else {
1496 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1497 .",lun=$drive->{index}";
1498 }
1499 $device .= ",drive=drive-$drive_id,id=$drive_id";
1500
1501 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1502 $device .= ",rotation_rate=1";
1503 }
1504 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1505
1506 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1507 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1508 my $controller = int($drive->{index} / $maxdev);
1509 my $unit = $drive->{index} % $maxdev;
1510 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1511
1512 $device = "ide-$devicetype";
1513 if ($drive->{interface} eq 'ide') {
1514 $device .= ",bus=ide.$controller,unit=$unit";
1515 } else {
1516 $device .= ",bus=ahci$controller.$unit";
1517 }
1518 $device .= ",drive=drive-$drive_id,id=$drive_id";
1519
1520 if ($devicetype eq 'hd') {
1521 if (my $model = $drive->{model}) {
1522 $model = URI::Escape::uri_unescape($model);
1523 $device .= ",model=$model";
1524 }
1525 if ($drive->{ssd}) {
1526 $device .= ",rotation_rate=1";
1527 }
1528 }
1529 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1530 } elsif ($drive->{interface} eq 'usb') {
1531 die "implement me";
1532 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1533 } else {
1534 die "unsupported interface type";
1535 }
1536
1537 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1538
1539 if (my $serial = $drive->{serial}) {
1540 $serial = URI::Escape::uri_unescape($serial);
1541 $device .= ",serial=$serial";
1542 }
1543
1544
1545 return $device;
1546 }
1547
1548 sub get_initiator_name {
1549 my $initiator;
1550
1551 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1552 while (defined(my $line = <$fh>)) {
1553 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1554 $initiator = $1;
1555 last;
1556 }
1557 $fh->close();
1558
1559 return $initiator;
1560 }
1561
1562 sub print_drive_commandline_full {
1563 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1564
1565 my $path;
1566 my $volid = $drive->{file};
1567 my $format = $drive->{format};
1568 my $drive_id = get_drive_id($drive);
1569
1570 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1571 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1572
1573 if (drive_is_cdrom($drive)) {
1574 $path = get_iso_path($storecfg, $vmid, $volid);
1575 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
1576 } else {
1577 if ($storeid) {
1578 $path = PVE::Storage::path($storecfg, $volid);
1579 $format //= qemu_img_format($scfg, $volname);
1580 } else {
1581 $path = $volid;
1582 $format //= "raw";
1583 }
1584 }
1585
1586 my $is_rbd = $path =~ m/^rbd:/;
1587
1588 my $opts = '';
1589 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1590 foreach my $o (@qemu_drive_options) {
1591 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1592 }
1593
1594 # snapshot only accepts on|off
1595 if (defined($drive->{snapshot})) {
1596 my $v = $drive->{snapshot} ? 'on' : 'off';
1597 $opts .= ",snapshot=$v";
1598 }
1599
1600 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1601 my ($dir, $qmpname) = @$type;
1602 if (my $v = $drive->{"mbps$dir"}) {
1603 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1604 }
1605 if (my $v = $drive->{"mbps${dir}_max"}) {
1606 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1607 }
1608 if (my $v = $drive->{"bps${dir}_max_length"}) {
1609 $opts .= ",throttling.bps$qmpname-max-length=$v";
1610 }
1611 if (my $v = $drive->{"iops${dir}"}) {
1612 $opts .= ",throttling.iops$qmpname=$v";
1613 }
1614 if (my $v = $drive->{"iops${dir}_max"}) {
1615 $opts .= ",throttling.iops$qmpname-max=$v";
1616 }
1617 if (my $v = $drive->{"iops${dir}_max_length"}) {
1618 $opts .= ",throttling.iops$qmpname-max-length=$v";
1619 }
1620 }
1621
1622 if ($pbs_name) {
1623 $format = "rbd" if $is_rbd;
1624 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1625 if !$format;
1626 $opts .= ",format=alloc-track,file.driver=$format";
1627 } elsif ($format) {
1628 $opts .= ",format=$format";
1629 }
1630
1631 my $cache_direct = 0;
1632
1633 if (my $cache = $drive->{cache}) {
1634 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1635 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1636 $opts .= ",cache=none";
1637 $cache_direct = 1;
1638 }
1639
1640 # io_uring with cache mode writeback or writethrough on krbd will hang...
1641 my $rbd_no_io_uring = $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1642
1643 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1644 # sometimes, just plain disable...
1645 my $lvm_no_io_uring = $scfg && $scfg->{type} eq 'lvm';
1646
1647 if (!$drive->{aio}) {
1648 if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring) {
1649 # io_uring supports all cache modes
1650 $opts .= ",aio=io_uring";
1651 } else {
1652 # aio native works only with O_DIRECT
1653 if($cache_direct) {
1654 $opts .= ",aio=native";
1655 } else {
1656 $opts .= ",aio=threads";
1657 }
1658 }
1659 }
1660
1661 if (!drive_is_cdrom($drive)) {
1662 my $detectzeroes;
1663 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1664 $detectzeroes = 'off';
1665 } elsif ($drive->{discard}) {
1666 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1667 } else {
1668 # This used to be our default with discard not being specified:
1669 $detectzeroes = 'on';
1670 }
1671
1672 # note: 'detect-zeroes' works per blockdev and we want it to persist
1673 # after the alloc-track is removed, so put it on 'file' directly
1674 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1675 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1676 }
1677
1678 if ($pbs_name) {
1679 $opts .= ",backing=$pbs_name";
1680 $opts .= ",auto-remove=on";
1681 }
1682
1683 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1684 my $file_param = "file";
1685 if ($pbs_name) {
1686 # non-rbd drivers require the underlying file to be a seperate block
1687 # node, so add a second .file indirection
1688 $file_param .= ".file" if !$is_rbd;
1689 $file_param .= ".filename";
1690 }
1691 my $pathinfo = $path ? "$file_param=$path," : '';
1692
1693 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1694 }
1695
1696 sub print_pbs_blockdev {
1697 my ($pbs_conf, $pbs_name) = @_;
1698 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1699 $blockdev .= ",repository=$pbs_conf->{repository}";
1700 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1701 $blockdev .= ",archive=$pbs_conf->{archive}";
1702 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1703 return $blockdev;
1704 }
1705
1706 sub print_netdevice_full {
1707 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type) = @_;
1708
1709 my $device = $net->{model};
1710 if ($net->{model} eq 'virtio') {
1711 $device = 'virtio-net-pci';
1712 };
1713
1714 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1715 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1716 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1717 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1718 # and out of each queue plus one config interrupt and control vector queue
1719 my $vectors = $net->{queues} * 2 + 2;
1720 $tmpstr .= ",vectors=$vectors,mq=on";
1721 }
1722 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1723
1724 if (my $mtu = $net->{mtu}) {
1725 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1726 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1727 if ($mtu == 1) {
1728 $mtu = $bridge_mtu;
1729 } elsif ($mtu < 576) {
1730 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1731 } elsif ($mtu > $bridge_mtu) {
1732 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1733 }
1734 $tmpstr .= ",host_mtu=$mtu";
1735 } else {
1736 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1737 }
1738 }
1739
1740 if ($use_old_bios_files) {
1741 my $romfile;
1742 if ($device eq 'virtio-net-pci') {
1743 $romfile = 'pxe-virtio.rom';
1744 } elsif ($device eq 'e1000') {
1745 $romfile = 'pxe-e1000.rom';
1746 } elsif ($device eq 'e1000e') {
1747 $romfile = 'pxe-e1000e.rom';
1748 } elsif ($device eq 'ne2k') {
1749 $romfile = 'pxe-ne2k_pci.rom';
1750 } elsif ($device eq 'pcnet') {
1751 $romfile = 'pxe-pcnet.rom';
1752 } elsif ($device eq 'rtl8139') {
1753 $romfile = 'pxe-rtl8139.rom';
1754 }
1755 $tmpstr .= ",romfile=$romfile" if $romfile;
1756 }
1757
1758 return $tmpstr;
1759 }
1760
1761 sub print_netdev_full {
1762 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1763
1764 my $i = '';
1765 if ($netid =~ m/^net(\d+)$/) {
1766 $i = int($1);
1767 }
1768
1769 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1770
1771 my $ifname = "tap${vmid}i$i";
1772
1773 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1774 die "interface name '$ifname' is too long (max 15 character)\n"
1775 if length($ifname) >= 16;
1776
1777 my $vhostparam = '';
1778 if (is_native($arch)) {
1779 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1780 }
1781
1782 my $vmname = $conf->{name} || "vm$vmid";
1783
1784 my $netdev = "";
1785 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1786
1787 if ($net->{bridge}) {
1788 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1789 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1790 } else {
1791 $netdev = "type=user,id=$netid,hostname=$vmname";
1792 }
1793
1794 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1795
1796 return $netdev;
1797 }
1798
1799 my $vga_map = {
1800 'cirrus' => 'cirrus-vga',
1801 'std' => 'VGA',
1802 'vmware' => 'vmware-svga',
1803 'virtio' => 'virtio-vga',
1804 };
1805
1806 sub print_vga_device {
1807 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1808
1809 my $type = $vga_map->{$vga->{type}};
1810 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1811 $type = 'virtio-gpu';
1812 }
1813 my $vgamem_mb = $vga->{memory};
1814
1815 my $max_outputs = '';
1816 if ($qxlnum) {
1817 $type = $id ? 'qxl' : 'qxl-vga';
1818
1819 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1820 # set max outputs so linux can have up to 4 qxl displays with one device
1821 if (min_version($machine_version, 4, 1)) {
1822 $max_outputs = ",max_outputs=4";
1823 }
1824 }
1825 }
1826
1827 die "no devicetype for $vga->{type}\n" if !$type;
1828
1829 my $memory = "";
1830 if ($vgamem_mb) {
1831 if ($vga->{type} eq 'virtio') {
1832 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1833 $memory = ",max_hostmem=$bytes";
1834 } elsif ($qxlnum) {
1835 # from https://www.spice-space.org/multiple-monitors.html
1836 $memory = ",vgamem_mb=$vga->{memory}";
1837 my $ram = $vgamem_mb * 4;
1838 my $vram = $vgamem_mb * 2;
1839 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1840 } else {
1841 $memory = ",vgamem_mb=$vga->{memory}";
1842 }
1843 } elsif ($qxlnum && $id) {
1844 $memory = ",ram_size=67108864,vram_size=33554432";
1845 }
1846
1847 my $edidoff = "";
1848 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1849 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1850 }
1851
1852 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1853 my $vgaid = "vga" . ($id // '');
1854 my $pciaddr;
1855 if ($q35 && $vgaid eq 'vga') {
1856 # the first display uses pcie.0 bus on q35 machines
1857 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1858 } else {
1859 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1860 }
1861
1862 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1863 }
1864
1865 sub parse_number_sets {
1866 my ($set) = @_;
1867 my $res = [];
1868 foreach my $part (split(/;/, $set)) {
1869 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1870 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1871 push @$res, [ $1, $2 ];
1872 } else {
1873 die "invalid range: $part\n";
1874 }
1875 }
1876 return $res;
1877 }
1878
1879 sub parse_numa {
1880 my ($data) = @_;
1881
1882 my $res = parse_property_string($numa_fmt, $data);
1883 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1884 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
1885 return $res;
1886 }
1887
1888 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1889 sub parse_net {
1890 my ($data) = @_;
1891
1892 my $res = eval { parse_property_string($net_fmt, $data) };
1893 if ($@) {
1894 warn $@;
1895 return;
1896 }
1897 if (!defined($res->{macaddr})) {
1898 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1899 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1900 }
1901 return $res;
1902 }
1903
1904 # ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1905 sub parse_ipconfig {
1906 my ($data) = @_;
1907
1908 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1909 if ($@) {
1910 warn $@;
1911 return;
1912 }
1913
1914 if ($res->{gw} && !$res->{ip}) {
1915 warn 'gateway specified without specifying an IP address';
1916 return;
1917 }
1918 if ($res->{gw6} && !$res->{ip6}) {
1919 warn 'IPv6 gateway specified without specifying an IPv6 address';
1920 return;
1921 }
1922 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1923 warn 'gateway specified together with DHCP';
1924 return;
1925 }
1926 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1927 # gw6 + auto/dhcp
1928 warn "IPv6 gateway specified together with $res->{ip6} address";
1929 return;
1930 }
1931
1932 if (!$res->{ip} && !$res->{ip6}) {
1933 return { ip => 'dhcp', ip6 => 'dhcp' };
1934 }
1935
1936 return $res;
1937 }
1938
1939 sub print_net {
1940 my $net = shift;
1941
1942 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1943 }
1944
1945 sub add_random_macs {
1946 my ($settings) = @_;
1947
1948 foreach my $opt (keys %$settings) {
1949 next if $opt !~ m/^net(\d+)$/;
1950 my $net = parse_net($settings->{$opt});
1951 next if !$net;
1952 $settings->{$opt} = print_net($net);
1953 }
1954 }
1955
1956 sub vm_is_volid_owner {
1957 my ($storecfg, $vmid, $volid) = @_;
1958
1959 if ($volid !~ m|^/|) {
1960 my ($path, $owner);
1961 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
1962 if ($owner && ($owner == $vmid)) {
1963 return 1;
1964 }
1965 }
1966
1967 return;
1968 }
1969
1970 sub vmconfig_register_unused_drive {
1971 my ($storecfg, $vmid, $conf, $drive) = @_;
1972
1973 if (drive_is_cloudinit($drive)) {
1974 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
1975 warn $@ if $@;
1976 } elsif (!drive_is_cdrom($drive)) {
1977 my $volid = $drive->{file};
1978 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
1979 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
1980 }
1981 }
1982 }
1983
1984 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
1985 my $smbios1_fmt = {
1986 uuid => {
1987 type => 'string',
1988 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
1989 format_description => 'UUID',
1990 description => "Set SMBIOS1 UUID.",
1991 optional => 1,
1992 },
1993 version => {
1994 type => 'string',
1995 pattern => '[A-Za-z0-9+\/]+={0,2}',
1996 format_description => 'Base64 encoded string',
1997 description => "Set SMBIOS1 version.",
1998 optional => 1,
1999 },
2000 serial => {
2001 type => 'string',
2002 pattern => '[A-Za-z0-9+\/]+={0,2}',
2003 format_description => 'Base64 encoded string',
2004 description => "Set SMBIOS1 serial number.",
2005 optional => 1,
2006 },
2007 manufacturer => {
2008 type => 'string',
2009 pattern => '[A-Za-z0-9+\/]+={0,2}',
2010 format_description => 'Base64 encoded string',
2011 description => "Set SMBIOS1 manufacturer.",
2012 optional => 1,
2013 },
2014 product => {
2015 type => 'string',
2016 pattern => '[A-Za-z0-9+\/]+={0,2}',
2017 format_description => 'Base64 encoded string',
2018 description => "Set SMBIOS1 product ID.",
2019 optional => 1,
2020 },
2021 sku => {
2022 type => 'string',
2023 pattern => '[A-Za-z0-9+\/]+={0,2}',
2024 format_description => 'Base64 encoded string',
2025 description => "Set SMBIOS1 SKU string.",
2026 optional => 1,
2027 },
2028 family => {
2029 type => 'string',
2030 pattern => '[A-Za-z0-9+\/]+={0,2}',
2031 format_description => 'Base64 encoded string',
2032 description => "Set SMBIOS1 family string.",
2033 optional => 1,
2034 },
2035 base64 => {
2036 type => 'boolean',
2037 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2038 optional => 1,
2039 },
2040 };
2041
2042 sub parse_smbios1 {
2043 my ($data) = @_;
2044
2045 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2046 warn $@ if $@;
2047 return $res;
2048 }
2049
2050 sub print_smbios1 {
2051 my ($smbios1) = @_;
2052 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2053 }
2054
2055 PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2056
2057 sub parse_watchdog {
2058 my ($value) = @_;
2059
2060 return if !$value;
2061
2062 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2063 warn $@ if $@;
2064 return $res;
2065 }
2066
2067 sub parse_guest_agent {
2068 my ($conf) = @_;
2069
2070 return {} if !defined($conf->{agent});
2071
2072 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2073 warn $@ if $@;
2074
2075 # if the agent is disabled ignore the other potentially set properties
2076 return {} if !$res->{enabled};
2077 return $res;
2078 }
2079
2080 sub get_qga_key {
2081 my ($conf, $key) = @_;
2082 return undef if !defined($conf->{agent});
2083
2084 my $agent = parse_guest_agent($conf);
2085 return $agent->{$key};
2086 }
2087
2088 sub parse_vga {
2089 my ($value) = @_;
2090
2091 return {} if !$value;
2092 my $res = eval { parse_property_string($vga_fmt, $value) };
2093 warn $@ if $@;
2094 return $res;
2095 }
2096
2097 sub parse_rng {
2098 my ($value) = @_;
2099
2100 return if !$value;
2101
2102 my $res = eval { parse_property_string($rng_fmt, $value) };
2103 warn $@ if $@;
2104 return $res;
2105 }
2106
2107 PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
2108 sub verify_usb_device {
2109 my ($value, $noerr) = @_;
2110
2111 return $value if parse_usb_device($value);
2112
2113 return if $noerr;
2114
2115 die "unable to parse usb device\n";
2116 }
2117
2118 # add JSON properties for create and set function
2119 sub json_config_properties {
2120 my $prop = shift;
2121
2122 my $skip_json_config_opts = {
2123 parent => 1,
2124 snaptime => 1,
2125 vmstate => 1,
2126 runningmachine => 1,
2127 runningcpu => 1,
2128 };
2129
2130 foreach my $opt (keys %$confdesc) {
2131 next if $skip_json_config_opts->{$opt};
2132 $prop->{$opt} = $confdesc->{$opt};
2133 }
2134
2135 return $prop;
2136 }
2137
2138 # return copy of $confdesc_cloudinit to generate documentation
2139 sub cloudinit_config_properties {
2140
2141 return dclone($confdesc_cloudinit);
2142 }
2143
2144 sub check_type {
2145 my ($key, $value) = @_;
2146
2147 die "unknown setting '$key'\n" if !$confdesc->{$key};
2148
2149 my $type = $confdesc->{$key}->{type};
2150
2151 if (!defined($value)) {
2152 die "got undefined value\n";
2153 }
2154
2155 if ($value =~ m/[\n\r]/) {
2156 die "property contains a line feed\n";
2157 }
2158
2159 if ($type eq 'boolean') {
2160 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2161 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2162 die "type check ('boolean') failed - got '$value'\n";
2163 } elsif ($type eq 'integer') {
2164 return int($1) if $value =~ m/^(\d+)$/;
2165 die "type check ('integer') failed - got '$value'\n";
2166 } elsif ($type eq 'number') {
2167 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2168 die "type check ('number') failed - got '$value'\n";
2169 } elsif ($type eq 'string') {
2170 if (my $fmt = $confdesc->{$key}->{format}) {
2171 PVE::JSONSchema::check_format($fmt, $value);
2172 return $value;
2173 }
2174 $value =~ s/^\"(.*)\"$/$1/;
2175 return $value;
2176 } else {
2177 die "internal error"
2178 }
2179 }
2180
2181 sub destroy_vm {
2182 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2183
2184 my $conf = PVE::QemuConfig->load_config($vmid);
2185
2186 PVE::QemuConfig->check_lock($conf) if !$skiplock;
2187
2188 if ($conf->{template}) {
2189 # check if any base image is still used by a linked clone
2190 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2191 my ($ds, $drive) = @_;
2192 return if drive_is_cdrom($drive);
2193
2194 my $volid = $drive->{file};
2195 return if !$volid || $volid =~ m|^/|;
2196
2197 die "base volume '$volid' is still in use by linked cloned\n"
2198 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2199
2200 });
2201 }
2202
2203 my $volids = {};
2204 my $remove_owned_drive = sub {
2205 my ($ds, $drive) = @_;
2206 return if drive_is_cdrom($drive, 1);
2207
2208 my $volid = $drive->{file};
2209 return if !$volid || $volid =~ m|^/|;
2210 return if $volids->{$volid};
2211
2212 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2213 return if !$path || !$owner || ($owner != $vmid);
2214
2215 $volids->{$volid} = 1;
2216 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2217 warn "Could not remove disk '$volid', check manually: $@" if $@;
2218 };
2219
2220 # only remove disks owned by this VM (referenced in the config)
2221 my $include_opts = {
2222 include_unused => 1,
2223 extra_keys => ['vmstate'],
2224 };
2225 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2226
2227 for my $snap (values %{$conf->{snapshots}}) {
2228 next if !defined($snap->{vmstate});
2229 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2230 next if !defined($drive);
2231 $remove_owned_drive->('vmstate', $drive);
2232 }
2233
2234 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2235
2236 if ($purge_unreferenced) { # also remove unreferenced disk
2237 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2238 PVE::Storage::foreach_volid($vmdisks, sub {
2239 my ($volid, $sid, $volname, $d) = @_;
2240 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2241 warn $@ if $@;
2242 });
2243 }
2244
2245 if (defined $replacement_conf) {
2246 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2247 } else {
2248 PVE::QemuConfig->destroy_config($vmid);
2249 }
2250 }
2251
2252 sub parse_vm_config {
2253 my ($filename, $raw) = @_;
2254
2255 return if !defined($raw);
2256
2257 my $res = {
2258 digest => Digest::SHA::sha1_hex($raw),
2259 snapshots => {},
2260 pending => {},
2261 };
2262
2263 $filename =~ m|/qemu-server/(\d+)\.conf$|
2264 || die "got strange filename '$filename'";
2265
2266 my $vmid = $1;
2267
2268 my $conf = $res;
2269 my $descr;
2270 my $section = '';
2271
2272 my @lines = split(/\n/, $raw);
2273 foreach my $line (@lines) {
2274 next if $line =~ m/^\s*$/;
2275
2276 if ($line =~ m/^\[PENDING\]\s*$/i) {
2277 $section = 'pending';
2278 if (defined($descr)) {
2279 $descr =~ s/\s+$//;
2280 $conf->{description} = $descr;
2281 }
2282 $descr = undef;
2283 $conf = $res->{$section} = {};
2284 next;
2285
2286 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2287 $section = $1;
2288 if (defined($descr)) {
2289 $descr =~ s/\s+$//;
2290 $conf->{description} = $descr;
2291 }
2292 $descr = undef;
2293 $conf = $res->{snapshots}->{$section} = {};
2294 next;
2295 }
2296
2297 if ($line =~ m/^\#(.*)\s*$/) {
2298 $descr = '' if !defined($descr);
2299 $descr .= PVE::Tools::decode_text($1) . "\n";
2300 next;
2301 }
2302
2303 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2304 $descr = '' if !defined($descr);
2305 $descr .= PVE::Tools::decode_text($2);
2306 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2307 $conf->{snapstate} = $1;
2308 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2309 my $key = $1;
2310 my $value = $2;
2311 $conf->{$key} = $value;
2312 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2313 my $value = $1;
2314 if ($section eq 'pending') {
2315 $conf->{delete} = $value; # we parse this later
2316 } else {
2317 warn "vm $vmid - propertry 'delete' is only allowed in [PENDING]\n";
2318 }
2319 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2320 my $key = $1;
2321 my $value = $2;
2322 eval { $value = check_type($key, $value); };
2323 if ($@) {
2324 warn "vm $vmid - unable to parse value of '$key' - $@";
2325 } else {
2326 $key = 'ide2' if $key eq 'cdrom';
2327 my $fmt = $confdesc->{$key}->{format};
2328 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2329 my $v = parse_drive($key, $value);
2330 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2331 $v->{file} = $volid;
2332 $value = print_drive($v);
2333 } else {
2334 warn "vm $vmid - unable to parse value of '$key'\n";
2335 next;
2336 }
2337 }
2338
2339 $conf->{$key} = $value;
2340 }
2341 } else {
2342 warn "vm $vmid - unable to parse config: $line\n";
2343 }
2344 }
2345
2346 if (defined($descr)) {
2347 $descr =~ s/\s+$//;
2348 $conf->{description} = $descr;
2349 }
2350 delete $res->{snapstate}; # just to be sure
2351
2352 return $res;
2353 }
2354
2355 sub write_vm_config {
2356 my ($filename, $conf) = @_;
2357
2358 delete $conf->{snapstate}; # just to be sure
2359
2360 if ($conf->{cdrom}) {
2361 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2362 $conf->{ide2} = $conf->{cdrom};
2363 delete $conf->{cdrom};
2364 }
2365
2366 # we do not use 'smp' any longer
2367 if ($conf->{sockets}) {
2368 delete $conf->{smp};
2369 } elsif ($conf->{smp}) {
2370 $conf->{sockets} = $conf->{smp};
2371 delete $conf->{cores};
2372 delete $conf->{smp};
2373 }
2374
2375 my $used_volids = {};
2376
2377 my $cleanup_config = sub {
2378 my ($cref, $pending, $snapname) = @_;
2379
2380 foreach my $key (keys %$cref) {
2381 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2382 $key eq 'snapstate' || $key eq 'pending';
2383 my $value = $cref->{$key};
2384 if ($key eq 'delete') {
2385 die "propertry 'delete' is only allowed in [PENDING]\n"
2386 if !$pending;
2387 # fixme: check syntax?
2388 next;
2389 }
2390 eval { $value = check_type($key, $value); };
2391 die "unable to parse value of '$key' - $@" if $@;
2392
2393 $cref->{$key} = $value;
2394
2395 if (!$snapname && is_valid_drivename($key)) {
2396 my $drive = parse_drive($key, $value);
2397 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2398 }
2399 }
2400 };
2401
2402 &$cleanup_config($conf);
2403
2404 &$cleanup_config($conf->{pending}, 1);
2405
2406 foreach my $snapname (keys %{$conf->{snapshots}}) {
2407 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2408 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2409 }
2410
2411 # remove 'unusedX' settings if we re-add a volume
2412 foreach my $key (keys %$conf) {
2413 my $value = $conf->{$key};
2414 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2415 delete $conf->{$key};
2416 }
2417 }
2418
2419 my $generate_raw_config = sub {
2420 my ($conf, $pending) = @_;
2421
2422 my $raw = '';
2423
2424 # add description as comment to top of file
2425 if (defined(my $descr = $conf->{description})) {
2426 if ($descr) {
2427 foreach my $cl (split(/\n/, $descr)) {
2428 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2429 }
2430 } else {
2431 $raw .= "#\n" if $pending;
2432 }
2433 }
2434
2435 foreach my $key (sort keys %$conf) {
2436 next if $key =~ /^(digest|description|pending|snapshots)$/;
2437 $raw .= "$key: $conf->{$key}\n";
2438 }
2439 return $raw;
2440 };
2441
2442 my $raw = &$generate_raw_config($conf);
2443
2444 if (scalar(keys %{$conf->{pending}})){
2445 $raw .= "\n[PENDING]\n";
2446 $raw .= &$generate_raw_config($conf->{pending}, 1);
2447 }
2448
2449 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2450 $raw .= "\n[$snapname]\n";
2451 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2452 }
2453
2454 return $raw;
2455 }
2456
2457 sub load_defaults {
2458
2459 my $res = {};
2460
2461 # we use static defaults from our JSON schema configuration
2462 foreach my $key (keys %$confdesc) {
2463 if (defined(my $default = $confdesc->{$key}->{default})) {
2464 $res->{$key} = $default;
2465 }
2466 }
2467
2468 return $res;
2469 }
2470
2471 sub config_list {
2472 my $vmlist = PVE::Cluster::get_vmlist();
2473 my $res = {};
2474 return $res if !$vmlist || !$vmlist->{ids};
2475 my $ids = $vmlist->{ids};
2476 my $nodename = nodename();
2477
2478 foreach my $vmid (keys %$ids) {
2479 my $d = $ids->{$vmid};
2480 next if !$d->{node} || $d->{node} ne $nodename;
2481 next if !$d->{type} || $d->{type} ne 'qemu';
2482 $res->{$vmid}->{exists} = 1;
2483 }
2484 return $res;
2485 }
2486
2487 # test if VM uses local resources (to prevent migration)
2488 sub check_local_resources {
2489 my ($conf, $noerr) = @_;
2490
2491 my @loc_res = ();
2492
2493 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2494 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2495
2496 push @loc_res, "ivshmem" if $conf->{ivshmem};
2497
2498 foreach my $k (keys %$conf) {
2499 next if $k =~ m/^usb/ && ($conf->{$k} =~ m/^spice(?![^,])/);
2500 # sockets are safe: they will recreated be on the target side post-migrate
2501 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2502 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2503 }
2504
2505 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2506
2507 return \@loc_res;
2508 }
2509
2510 # check if used storages are available on all nodes (use by migrate)
2511 sub check_storage_availability {
2512 my ($storecfg, $conf, $node) = @_;
2513
2514 PVE::QemuConfig->foreach_volume($conf, sub {
2515 my ($ds, $drive) = @_;
2516
2517 my $volid = $drive->{file};
2518 return if !$volid;
2519
2520 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2521 return if !$sid;
2522
2523 # check if storage is available on both nodes
2524 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2525 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2526
2527 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2528
2529 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2530 if !$scfg->{content}->{$vtype};
2531 });
2532 }
2533
2534 # list nodes where all VM images are available (used by has_feature API)
2535 sub shared_nodes {
2536 my ($conf, $storecfg) = @_;
2537
2538 my $nodelist = PVE::Cluster::get_nodelist();
2539 my $nodehash = { map { $_ => 1 } @$nodelist };
2540 my $nodename = nodename();
2541
2542 PVE::QemuConfig->foreach_volume($conf, sub {
2543 my ($ds, $drive) = @_;
2544
2545 my $volid = $drive->{file};
2546 return if !$volid;
2547
2548 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2549 if ($storeid) {
2550 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2551 if ($scfg->{disable}) {
2552 $nodehash = {};
2553 } elsif (my $avail = $scfg->{nodes}) {
2554 foreach my $node (keys %$nodehash) {
2555 delete $nodehash->{$node} if !$avail->{$node};
2556 }
2557 } elsif (!$scfg->{shared}) {
2558 foreach my $node (keys %$nodehash) {
2559 delete $nodehash->{$node} if $node ne $nodename
2560 }
2561 }
2562 }
2563 });
2564
2565 return $nodehash
2566 }
2567
2568 sub check_local_storage_availability {
2569 my ($conf, $storecfg) = @_;
2570
2571 my $nodelist = PVE::Cluster::get_nodelist();
2572 my $nodehash = { map { $_ => {} } @$nodelist };
2573
2574 PVE::QemuConfig->foreach_volume($conf, sub {
2575 my ($ds, $drive) = @_;
2576
2577 my $volid = $drive->{file};
2578 return if !$volid;
2579
2580 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2581 if ($storeid) {
2582 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2583
2584 if ($scfg->{disable}) {
2585 foreach my $node (keys %$nodehash) {
2586 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2587 }
2588 } elsif (my $avail = $scfg->{nodes}) {
2589 foreach my $node (keys %$nodehash) {
2590 if (!$avail->{$node}) {
2591 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2592 }
2593 }
2594 }
2595 }
2596 });
2597
2598 foreach my $node (values %$nodehash) {
2599 if (my $unavail = $node->{unavailable_storages}) {
2600 $node->{unavailable_storages} = [ sort keys %$unavail ];
2601 }
2602 }
2603
2604 return $nodehash
2605 }
2606
2607 # Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2608 sub check_running {
2609 my ($vmid, $nocheck, $node) = @_;
2610
2611 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2612 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2613 }
2614
2615 sub vzlist {
2616
2617 my $vzlist = config_list();
2618
2619 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2620
2621 while (defined(my $de = $fd->read)) {
2622 next if $de !~ m/^(\d+)\.pid$/;
2623 my $vmid = $1;
2624 next if !defined($vzlist->{$vmid});
2625 if (my $pid = check_running($vmid)) {
2626 $vzlist->{$vmid}->{pid} = $pid;
2627 }
2628 }
2629
2630 return $vzlist;
2631 }
2632
2633 our $vmstatus_return_properties = {
2634 vmid => get_standard_option('pve-vmid'),
2635 status => {
2636 description => "Qemu process status.",
2637 type => 'string',
2638 enum => ['stopped', 'running'],
2639 },
2640 maxmem => {
2641 description => "Maximum memory in bytes.",
2642 type => 'integer',
2643 optional => 1,
2644 renderer => 'bytes',
2645 },
2646 maxdisk => {
2647 description => "Root disk size in bytes.",
2648 type => 'integer',
2649 optional => 1,
2650 renderer => 'bytes',
2651 },
2652 name => {
2653 description => "VM name.",
2654 type => 'string',
2655 optional => 1,
2656 },
2657 qmpstatus => {
2658 description => "Qemu QMP agent status.",
2659 type => 'string',
2660 optional => 1,
2661 },
2662 pid => {
2663 description => "PID of running qemu process.",
2664 type => 'integer',
2665 optional => 1,
2666 },
2667 uptime => {
2668 description => "Uptime.",
2669 type => 'integer',
2670 optional => 1,
2671 renderer => 'duration',
2672 },
2673 cpus => {
2674 description => "Maximum usable CPUs.",
2675 type => 'number',
2676 optional => 1,
2677 },
2678 lock => {
2679 description => "The current config lock, if any.",
2680 type => 'string',
2681 optional => 1,
2682 },
2683 tags => {
2684 description => "The current configured tags, if any",
2685 type => 'string',
2686 optional => 1,
2687 },
2688 'running-machine' => {
2689 description => "The currently running machine type (if running).",
2690 type => 'string',
2691 optional => 1,
2692 },
2693 'running-qemu' => {
2694 description => "The currently running QEMU version (if running).",
2695 type => 'string',
2696 optional => 1,
2697 },
2698 };
2699
2700 my $last_proc_pid_stat;
2701
2702 # get VM status information
2703 # This must be fast and should not block ($full == false)
2704 # We only query KVM using QMP if $full == true (this can be slow)
2705 sub vmstatus {
2706 my ($opt_vmid, $full) = @_;
2707
2708 my $res = {};
2709
2710 my $storecfg = PVE::Storage::config();
2711
2712 my $list = vzlist();
2713 my $defaults = load_defaults();
2714
2715 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2716
2717 my $cpucount = $cpuinfo->{cpus} || 1;
2718
2719 foreach my $vmid (keys %$list) {
2720 next if $opt_vmid && ($vmid ne $opt_vmid);
2721
2722 my $conf = PVE::QemuConfig->load_config($vmid);
2723
2724 my $d = { vmid => int($vmid) };
2725 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2726
2727 # fixme: better status?
2728 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2729
2730 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2731 if (defined($size)) {
2732 $d->{disk} = 0; # no info available
2733 $d->{maxdisk} = $size;
2734 } else {
2735 $d->{disk} = 0;
2736 $d->{maxdisk} = 0;
2737 }
2738
2739 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2740 * ($conf->{cores} || $defaults->{cores});
2741 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2742 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2743
2744 $d->{name} = $conf->{name} || "VM $vmid";
2745 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2746 : $defaults->{memory}*(1024*1024);
2747
2748 if ($conf->{balloon}) {
2749 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2750 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2751 : $defaults->{shares};
2752 }
2753
2754 $d->{uptime} = 0;
2755 $d->{cpu} = 0;
2756 $d->{mem} = 0;
2757
2758 $d->{netout} = 0;
2759 $d->{netin} = 0;
2760
2761 $d->{diskread} = 0;
2762 $d->{diskwrite} = 0;
2763
2764 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2765
2766 $d->{serial} = 1 if conf_has_serial($conf);
2767 $d->{lock} = $conf->{lock} if $conf->{lock};
2768 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2769
2770 $res->{$vmid} = $d;
2771 }
2772
2773 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2774 foreach my $dev (keys %$netdev) {
2775 next if $dev !~ m/^tap([1-9]\d*)i/;
2776 my $vmid = $1;
2777 my $d = $res->{$vmid};
2778 next if !$d;
2779
2780 $d->{netout} += $netdev->{$dev}->{receive};
2781 $d->{netin} += $netdev->{$dev}->{transmit};
2782
2783 if ($full) {
2784 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2785 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2786 }
2787
2788 }
2789
2790 my $ctime = gettimeofday;
2791
2792 foreach my $vmid (keys %$list) {
2793
2794 my $d = $res->{$vmid};
2795 my $pid = $d->{pid};
2796 next if !$pid;
2797
2798 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2799 next if !$pstat; # not running
2800
2801 my $used = $pstat->{utime} + $pstat->{stime};
2802
2803 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2804
2805 if ($pstat->{vsize}) {
2806 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2807 }
2808
2809 my $old = $last_proc_pid_stat->{$pid};
2810 if (!$old) {
2811 $last_proc_pid_stat->{$pid} = {
2812 time => $ctime,
2813 used => $used,
2814 cpu => 0,
2815 };
2816 next;
2817 }
2818
2819 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
2820
2821 if ($dtime > 1000) {
2822 my $dutime = $used - $old->{used};
2823
2824 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
2825 $last_proc_pid_stat->{$pid} = {
2826 time => $ctime,
2827 used => $used,
2828 cpu => $d->{cpu},
2829 };
2830 } else {
2831 $d->{cpu} = $old->{cpu};
2832 }
2833 }
2834
2835 return $res if !$full;
2836
2837 my $qmpclient = PVE::QMPClient->new();
2838
2839 my $ballooncb = sub {
2840 my ($vmid, $resp) = @_;
2841
2842 my $info = $resp->{'return'};
2843 return if !$info->{max_mem};
2844
2845 my $d = $res->{$vmid};
2846
2847 # use memory assigned to VM
2848 $d->{maxmem} = $info->{max_mem};
2849 $d->{balloon} = $info->{actual};
2850
2851 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
2852 $d->{mem} = $info->{total_mem} - $info->{free_mem};
2853 $d->{freemem} = $info->{free_mem};
2854 }
2855
2856 $d->{ballooninfo} = $info;
2857 };
2858
2859 my $blockstatscb = sub {
2860 my ($vmid, $resp) = @_;
2861 my $data = $resp->{'return'} || [];
2862 my $totalrdbytes = 0;
2863 my $totalwrbytes = 0;
2864
2865 for my $blockstat (@$data) {
2866 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
2867 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
2868
2869 $blockstat->{device} =~ s/drive-//;
2870 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
2871 }
2872 $res->{$vmid}->{diskread} = $totalrdbytes;
2873 $res->{$vmid}->{diskwrite} = $totalwrbytes;
2874 };
2875
2876 my $machinecb = sub {
2877 my ($vmid, $resp) = @_;
2878 my $data = $resp->{'return'} || [];
2879
2880 $res->{$vmid}->{'running-machine'} =
2881 PVE::QemuServer::Machine::current_from_query_machines($data);
2882 };
2883
2884 my $versioncb = sub {
2885 my ($vmid, $resp) = @_;
2886 my $data = $resp->{'return'} // {};
2887 my $version = 'unknown';
2888
2889 if (my $v = $data->{qemu}) {
2890 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
2891 }
2892
2893 $res->{$vmid}->{'running-qemu'} = $version;
2894 };
2895
2896 my $statuscb = sub {
2897 my ($vmid, $resp) = @_;
2898
2899 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
2900 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
2901 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
2902 # this fails if ballon driver is not loaded, so this must be
2903 # the last commnand (following command are aborted if this fails).
2904 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
2905
2906 my $status = 'unknown';
2907 if (!defined($status = $resp->{'return'}->{status})) {
2908 warn "unable to get VM status\n";
2909 return;
2910 }
2911
2912 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
2913 };
2914
2915 foreach my $vmid (keys %$list) {
2916 next if $opt_vmid && ($vmid ne $opt_vmid);
2917 next if !$res->{$vmid}->{pid}; # not running
2918 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
2919 }
2920
2921 $qmpclient->queue_execute(undef, 2);
2922
2923 foreach my $vmid (keys %$list) {
2924 next if $opt_vmid && ($vmid ne $opt_vmid);
2925 next if !$res->{$vmid}->{pid}; #not running
2926
2927 # we can't use the $qmpclient since it might have already aborted on
2928 # 'query-balloon', but this might also fail for older versions...
2929 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
2930 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
2931 }
2932
2933 foreach my $vmid (keys %$list) {
2934 next if $opt_vmid && ($vmid ne $opt_vmid);
2935 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
2936 }
2937
2938 return $res;
2939 }
2940
2941 sub conf_has_serial {
2942 my ($conf) = @_;
2943
2944 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
2945 if ($conf->{"serial$i"}) {
2946 return 1;
2947 }
2948 }
2949
2950 return 0;
2951 }
2952
2953 sub conf_has_audio {
2954 my ($conf, $id) = @_;
2955
2956 $id //= 0;
2957 my $audio = $conf->{"audio$id"};
2958 return if !defined($audio);
2959
2960 my $audioproperties = parse_property_string($audio_fmt, $audio);
2961 my $audiodriver = $audioproperties->{driver} // 'spice';
2962
2963 return {
2964 dev => $audioproperties->{device},
2965 dev_id => "audiodev$id",
2966 backend => $audiodriver,
2967 backend_id => "$audiodriver-backend${id}",
2968 };
2969 }
2970
2971 sub audio_devs {
2972 my ($audio, $audiopciaddr, $machine_version) = @_;
2973
2974 my $devs = [];
2975
2976 my $id = $audio->{dev_id};
2977 my $audiodev = "";
2978 if (min_version($machine_version, 4, 2)) {
2979 $audiodev = ",audiodev=$audio->{backend_id}";
2980 }
2981
2982 if ($audio->{dev} eq 'AC97') {
2983 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
2984 } elsif ($audio->{dev} =~ /intel\-hda$/) {
2985 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
2986 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
2987 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
2988 } else {
2989 die "unkown audio device '$audio->{dev}', implement me!";
2990 }
2991
2992 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
2993
2994 return $devs;
2995 }
2996
2997 sub get_tpm_paths {
2998 my ($vmid) = @_;
2999 return {
3000 socket => "/var/run/qemu-server/$vmid.swtpm",
3001 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3002 };
3003 }
3004
3005 sub add_tpm_device {
3006 my ($vmid, $devices, $conf) = @_;
3007
3008 return if !$conf->{tpmstate0};
3009
3010 my $paths = get_tpm_paths($vmid);
3011
3012 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3013 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3014 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3015 }
3016
3017 sub start_swtpm {
3018 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3019
3020 return if !$tpmdrive;
3021
3022 my $state;
3023 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3024 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3025 if ($storeid) {
3026 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3027 } else {
3028 $state = $tpm->{file};
3029 }
3030
3031 my $paths = get_tpm_paths($vmid);
3032
3033 # during migration, we will get state from remote
3034 #
3035 if (!$migration) {
3036 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3037 my $setup_cmd = [
3038 "swtpm_setup",
3039 "--tpmstate",
3040 "file://$state",
3041 "--createek",
3042 "--create-ek-cert",
3043 "--create-platform-cert",
3044 "--lock-nvram",
3045 "--config",
3046 "/etc/swtpm_setup.conf", # do not use XDG configs
3047 "--runas",
3048 "0", # force creation as root, error if not possible
3049 "--not-overwrite", # ignore existing state, do not modify
3050 ];
3051
3052 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3053 # TPM 2.0 supports ECC crypto, use if possible
3054 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3055
3056 run_command($setup_cmd, outfunc => sub {
3057 print "swtpm_setup: $1\n";
3058 });
3059 }
3060
3061 my $emulator_cmd = [
3062 "swtpm",
3063 "socket",
3064 "--tpmstate",
3065 "backend-uri=file://$state,mode=0600",
3066 "--ctrl",
3067 "type=unixio,path=$paths->{socket},mode=0600",
3068 "--pid",
3069 "file=$paths->{pid}",
3070 "--terminate", # terminate on QEMU disconnect
3071 "--daemon",
3072 ];
3073 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3074 run_command($emulator_cmd, outfunc => sub { print $1; });
3075
3076 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3077 while (! -e $paths->{pid}) {
3078 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3079 usleep(50_000);
3080 }
3081
3082 # return untainted PID of swtpm daemon so it can be killed on error
3083 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3084 return $1;
3085 }
3086
3087 sub vga_conf_has_spice {
3088 my ($vga) = @_;
3089
3090 my $vgaconf = parse_vga($vga);
3091 my $vgatype = $vgaconf->{type};
3092 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3093
3094 return $1 || 1;
3095 }
3096
3097 sub is_native($) {
3098 my ($arch) = @_;
3099 return get_host_arch() eq $arch;
3100 }
3101
3102 sub get_vm_arch {
3103 my ($conf) = @_;
3104 return $conf->{arch} // get_host_arch();
3105 }
3106
3107 my $default_machines = {
3108 x86_64 => 'pc',
3109 aarch64 => 'virt',
3110 };
3111
3112 sub get_installed_machine_version {
3113 my ($kvmversion) = @_;
3114 $kvmversion = kvm_user_version() if !defined($kvmversion);
3115 $kvmversion =~ m/^(\d+\.\d+)/;
3116 return $1;
3117 }
3118
3119 sub windows_get_pinned_machine_version {
3120 my ($machine, $base_version, $kvmversion) = @_;
3121
3122 my $pin_version = $base_version;
3123 if (!defined($base_version) ||
3124 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3125 ) {
3126 $pin_version = get_installed_machine_version($kvmversion);
3127 }
3128 if (!$machine || $machine eq 'pc') {
3129 $machine = "pc-i440fx-$pin_version";
3130 } elsif ($machine eq 'q35') {
3131 $machine = "pc-q35-$pin_version";
3132 } elsif ($machine eq 'virt') {
3133 $machine = "virt-$pin_version";
3134 } else {
3135 warn "unknown machine type '$machine', not touching that!\n";
3136 }
3137
3138 return $machine;
3139 }
3140
3141 sub get_vm_machine {
3142 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3143
3144 my $machine = $forcemachine || $conf->{machine};
3145
3146 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3147 $kvmversion //= kvm_user_version();
3148 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3149 # layout which confuses windows quite a bit and may result in various regressions..
3150 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3151 if (windows_version($conf->{ostype})) {
3152 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3153 }
3154 $arch //= 'x86_64';
3155 $machine ||= $default_machines->{$arch};
3156 if ($add_pve_version) {
3157 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3158 $machine .= "+pve$pvever";
3159 }
3160 }
3161
3162 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3163 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3164 $machine = $1 if $is_pxe;
3165
3166 # for version-pinned machines that do not include a pve-version (e.g.
3167 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3168 $machine .= '+pve0';
3169
3170 $machine .= '.pxe' if $is_pxe;
3171 }
3172
3173 return $machine;
3174 }
3175
3176 sub get_ovmf_files($$$) {
3177 my ($arch, $efidisk, $smm) = @_;
3178
3179 my $types = $OVMF->{$arch}
3180 or die "no OVMF images known for architecture '$arch'\n";
3181
3182 my $type = 'default';
3183 if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3184 $type = $smm ? "4m" : "4m-no-smm";
3185 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3186 }
3187
3188 return $types->{$type}->@*;
3189 }
3190
3191 my $Arch2Qemu = {
3192 aarch64 => '/usr/bin/qemu-system-aarch64',
3193 x86_64 => '/usr/bin/qemu-system-x86_64',
3194 };
3195 sub get_command_for_arch($) {
3196 my ($arch) = @_;
3197 return '/usr/bin/kvm' if is_native($arch);
3198
3199 my $cmd = $Arch2Qemu->{$arch}
3200 or die "don't know how to emulate architecture '$arch'\n";
3201 return $cmd;
3202 }
3203
3204 # To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3205 # to use in a QEMU command line (-cpu element), first array_intersect the result
3206 # of query_supported_ with query_understood_. This is necessary because:
3207 #
3208 # a) query_understood_ returns flags the host cannot use and
3209 # b) query_supported_ (rather the QMP call) doesn't actually return CPU
3210 # flags, but CPU settings - with most of them being flags. Those settings
3211 # (and some flags, curiously) cannot be specified as a "-cpu" argument.
3212 #
3213 # query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3214 # expensive. If you need the value returned from this, you can get it much
3215 # cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3216 # $accel being 'kvm' or 'tcg'.
3217 #
3218 # pvestatd calls this function on startup and whenever the QEMU/KVM version
3219 # changes, automatically populating pmxcfs.
3220 #
3221 # Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3222 # since kvm and tcg machines support different flags
3223 #
3224 sub query_supported_cpu_flags {
3225 my ($arch) = @_;
3226
3227 $arch //= get_host_arch();
3228 my $default_machine = $default_machines->{$arch};
3229
3230 my $flags = {};
3231
3232 # FIXME: Once this is merged, the code below should work for ARM as well:
3233 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3234 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3235 $arch eq "aarch64";
3236
3237 my $kvm_supported = defined(kvm_version());
3238 my $qemu_cmd = get_command_for_arch($arch);
3239 my $fakevmid = -1;
3240 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3241
3242 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3243 my $query_supported_run_qemu = sub {
3244 my ($kvm) = @_;
3245
3246 my $flags = {};
3247 my $cmd = [
3248 $qemu_cmd,
3249 '-machine', $default_machine,
3250 '-display', 'none',
3251 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3252 '-mon', 'chardev=qmp,mode=control',
3253 '-pidfile', $pidfile,
3254 '-S', '-daemonize'
3255 ];
3256
3257 if (!$kvm) {
3258 push @$cmd, '-accel', 'tcg';
3259 }
3260
3261 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3262 die "QEMU flag querying VM exited with code " . $rc if $rc;
3263
3264 eval {
3265 my $cmd_result = mon_cmd(
3266 $fakevmid,
3267 'query-cpu-model-expansion',
3268 type => 'full',
3269 model => { name => 'host' }
3270 );
3271
3272 my $props = $cmd_result->{model}->{props};
3273 foreach my $prop (keys %$props) {
3274 next if $props->{$prop} ne '1';
3275 # QEMU returns some flags multiple times, with '_', '.' or '-'
3276 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3277 # We only keep those with underscores, to match /proc/cpuinfo
3278 $prop =~ s/\.|-/_/g;
3279 $flags->{$prop} = 1;
3280 }
3281 };
3282 my $err = $@;
3283
3284 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3285 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3286
3287 die $err if $err;
3288
3289 return [ sort keys %$flags ];
3290 };
3291
3292 # We need to query QEMU twice, since KVM and TCG have different supported flags
3293 PVE::QemuConfig->lock_config($fakevmid, sub {
3294 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3295 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3296
3297 if ($kvm_supported) {
3298 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3299 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3300 }
3301 });
3302
3303 return $flags;
3304 }
3305
3306 # Understood CPU flags are written to a file at 'pve-qemu' compile time
3307 my $understood_cpu_flag_dir = "/usr/share/kvm";
3308 sub query_understood_cpu_flags {
3309 my $arch = get_host_arch();
3310 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3311
3312 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3313 if ! -e $filepath;
3314
3315 my $raw = file_get_contents($filepath);
3316 $raw =~ s/^\s+|\s+$//g;
3317 my @flags = split(/\s+/, $raw);
3318
3319 return \@flags;
3320 }
3321
3322 my sub get_cpuunits {
3323 my ($conf) = @_;
3324 return $conf->{cpuunits} // (PVE::CGroup::cgroup_mode() == 2 ? 100 : 1024);
3325 }
3326 sub config_to_command {
3327 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3328 $pbs_backing) = @_;
3329
3330 my $cmd = [];
3331 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3332 my $devices = [];
3333 my $bridges = {};
3334 my $ostype = $conf->{ostype};
3335 my $winversion = windows_version($ostype);
3336 my $kvm = $conf->{kvm};
3337 my $nodename = nodename();
3338
3339 my $arch = get_vm_arch($conf);
3340 my $kvm_binary = get_command_for_arch($arch);
3341 my $kvmver = kvm_user_version($kvm_binary);
3342
3343 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3344 $kvmver //= "undefined";
3345 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3346 }
3347
3348 my $add_pve_version = min_version($kvmver, 4, 1);
3349
3350 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3351 my $machine_version = extract_version($machine_type, $kvmver);
3352 $kvm //= 1 if is_native($arch);
3353
3354 $machine_version =~ m/(\d+)\.(\d+)/;
3355 my ($machine_major, $machine_minor) = ($1, $2);
3356
3357 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3358 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3359 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3360 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3361 ." please upgrade node '$nodename'\n"
3362 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3363 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3364 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3365 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3366 ." node '$nodename'\n";
3367 }
3368
3369 # if a specific +pve version is required for a feature, use $version_guard
3370 # instead of min_version to allow machines to be run with the minimum
3371 # required version
3372 my $required_pve_version = 0;
3373 my $version_guard = sub {
3374 my ($major, $minor, $pve) = @_;
3375 return 0 if !min_version($machine_version, $major, $minor, $pve);
3376 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3377 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3378 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3379 return 1;
3380 };
3381
3382 if ($kvm && !defined kvm_version()) {
3383 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3384 ." or enable in BIOS.\n";
3385 }
3386
3387 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3388 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3389 my $use_old_bios_files = undef;
3390 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3391
3392 my $cpuunits = get_cpuunits($conf);
3393
3394 push @$cmd, $kvm_binary;
3395
3396 push @$cmd, '-id', $vmid;
3397
3398 my $vmname = $conf->{name} || "vm$vmid";
3399
3400 push @$cmd, '-name', $vmname;
3401
3402 push @$cmd, '-no-shutdown';
3403
3404 my $use_virtio = 0;
3405
3406 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3407 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3408 push @$cmd, '-mon', "chardev=qmp,mode=control";
3409
3410 if (min_version($machine_version, 2, 12)) {
3411 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3412 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3413 }
3414
3415 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3416
3417 push @$cmd, '-daemonize';
3418
3419 if ($conf->{smbios1}) {
3420 my $smbios_conf = parse_smbios1($conf->{smbios1});
3421 if ($smbios_conf->{base64}) {
3422 # Do not pass base64 flag to qemu
3423 delete $smbios_conf->{base64};
3424 my $smbios_string = "";
3425 foreach my $key (keys %$smbios_conf) {
3426 my $value;
3427 if ($key eq "uuid") {
3428 $value = $smbios_conf->{uuid}
3429 } else {
3430 $value = decode_base64($smbios_conf->{$key});
3431 }
3432 # qemu accepts any binary data, only commas need escaping by double comma
3433 $value =~ s/,/,,/g;
3434 $smbios_string .= "," . $key . "=" . $value if $value;
3435 }
3436 push @$cmd, '-smbios', "type=1" . $smbios_string;
3437 } else {
3438 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3439 }
3440 }
3441
3442 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3443 my $d;
3444 if (my $efidisk = $conf->{efidisk0}) {
3445 $d = parse_drive('efidisk0', $efidisk);
3446 }
3447
3448 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3449 die "uefi base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3450
3451 my ($path, $format);
3452 my $read_only_str = '';
3453 if ($d) {
3454 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3455 $format = $d->{format};
3456 if ($storeid) {
3457 $path = PVE::Storage::path($storecfg, $d->{file});
3458 if (!defined($format)) {
3459 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3460 $format = qemu_img_format($scfg, $volname);
3461 }
3462 } else {
3463 $path = $d->{file};
3464 die "efidisk format must be specified\n"
3465 if !defined($format);
3466 }
3467
3468 $read_only_str = ',readonly=on' if drive_is_read_only($conf, $d);
3469 } else {
3470 warn "no efidisk configured! Using temporary efivars disk.\n";
3471 $path = "/tmp/$vmid-ovmf.fd";
3472 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3473 $format = 'raw';
3474 }
3475
3476 my $size_str = "";
3477
3478 if ($format eq 'raw' && $version_guard->(4, 1, 2)) {
3479 $size_str = ",size=" . (-s $ovmf_vars);
3480 }
3481
3482 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3483 my $cache = "";
3484 if ($path =~ m/^rbd:/) {
3485 $cache = ',cache=writeback';
3486 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3487 }
3488
3489 push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code";
3490 push @$cmd, '-drive', "if=pflash,unit=1$cache,format=$format,id=drive-efidisk0$size_str,file=${path}${read_only_str}";
3491 }
3492
3493 if ($q35) { # tell QEMU to load q35 config early
3494 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3495 if (min_version($machine_version, 4, 0)) {
3496 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3497 } else {
3498 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3499 }
3500 }
3501
3502 if ($conf->{vmgenid}) {
3503 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3504 }
3505
3506 # add usb controllers
3507 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3508 $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES);
3509 push @$devices, @usbcontrollers if @usbcontrollers;
3510 my $vga = parse_vga($conf->{vga});
3511
3512 my $qxlnum = vga_conf_has_spice($conf->{vga});
3513 $vga->{type} = 'qxl' if $qxlnum;
3514
3515 if (!$vga->{type}) {
3516 if ($arch eq 'aarch64') {
3517 $vga->{type} = 'virtio';
3518 } elsif (min_version($machine_version, 2, 9)) {
3519 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3520 } else {
3521 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3522 }
3523 }
3524
3525 # enable absolute mouse coordinates (needed by vnc)
3526 my $tablet = $conf->{tablet};
3527 if (!defined($tablet)) {
3528 $tablet = $defaults->{tablet};
3529 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3530 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3531 }
3532
3533 if ($tablet) {
3534 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3535 my $kbd = print_keyboarddevice_full($conf, $arch);
3536 push @$devices, '-device', $kbd if defined($kbd);
3537 }
3538
3539 my $bootorder = device_bootorder($conf);
3540
3541 # host pci device passthrough
3542 my ($kvm_off, $gpu_passthrough, $legacy_igd) = PVE::QemuServer::PCI::print_hostpci_devices(
3543 $vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder);
3544
3545 # usb devices
3546 my $usb_dev_features = {};
3547 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3548
3549 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3550 $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder);
3551 push @$devices, @usbdevices if @usbdevices;
3552
3553 # serial devices
3554 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3555 my $path = $conf->{"serial$i"} or next;
3556 if ($path eq 'socket') {
3557 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3558 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3559 # On aarch64, serial0 is the UART device. Qemu only allows
3560 # connecting UART devices via the '-serial' command line, as
3561 # the device has a fixed slot on the hardware...
3562 if ($arch eq 'aarch64' && $i == 0) {
3563 push @$devices, '-serial', "chardev:serial$i";
3564 } else {
3565 push @$devices, '-device', "isa-serial,chardev=serial$i";
3566 }
3567 } else {
3568 die "no such serial device\n" if ! -c $path;
3569 push @$devices, '-chardev', "tty,id=serial$i,path=$path";
3570 push @$devices, '-device', "isa-serial,chardev=serial$i";
3571 }
3572 }
3573
3574 # parallel devices
3575 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3576 if (my $path = $conf->{"parallel$i"}) {
3577 die "no such parallel device\n" if ! -c $path;
3578 my $devtype = $path =~ m!^/dev/usb/lp! ? 'tty' : 'parport';
3579 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3580 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3581 }
3582 }
3583
3584 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3585 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3586 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3587 push @$devices, @$audio_devs;
3588 }
3589
3590 add_tpm_device($vmid, $devices, $conf);
3591
3592 my $sockets = 1;
3593 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3594 $sockets = $conf->{sockets} if $conf->{sockets};
3595
3596 my $cores = $conf->{cores} || 1;
3597
3598 my $maxcpus = $sockets * $cores;
3599
3600 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3601
3602 my $allowed_vcpus = $cpuinfo->{cpus};
3603
3604 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3605
3606 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3607 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3608 for (my $i = 2; $i <= $vcpus; $i++) {
3609 my $cpustr = print_cpu_device($conf,$i);
3610 push @$cmd, '-device', $cpustr;
3611 }
3612
3613 } else {
3614
3615 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3616 }
3617 push @$cmd, '-nodefaults';
3618
3619 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3620
3621 push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3622
3623 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3624
3625 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3626 push @$devices, '-device', print_vga_device(
3627 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3628 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3629 push @$cmd, '-vnc', "unix:$socket,password=on";
3630 } else {
3631 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3632 push @$cmd, '-nographic';
3633 }
3634
3635 # time drift fix
3636 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3637 my $useLocaltime = $conf->{localtime};
3638
3639 if ($winversion >= 5) { # windows
3640 $useLocaltime = 1 if !defined($conf->{localtime});
3641
3642 # use time drift fix when acpi is enabled
3643 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3644 $tdf = 1 if !defined($conf->{tdf});
3645 }
3646 }
3647
3648 if ($winversion >= 6) {
3649 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3650 push @$cmd, '-no-hpet';
3651 }
3652
3653 push @$rtcFlags, 'driftfix=slew' if $tdf;
3654
3655 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3656 push @$rtcFlags, "base=$conf->{startdate}";
3657 } elsif ($useLocaltime) {
3658 push @$rtcFlags, 'base=localtime';
3659 }
3660
3661 if ($forcecpu) {
3662 push @$cmd, '-cpu', $forcecpu;
3663 } else {
3664 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3665 }
3666
3667 PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
3668
3669 push @$cmd, '-S' if $conf->{freeze};
3670
3671 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3672
3673 my $guest_agent = parse_guest_agent($conf);
3674
3675 if ($guest_agent->{enabled}) {
3676 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3677 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3678
3679 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3680 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3681 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3682 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3683 } elsif ($guest_agent->{type} eq 'isa') {
3684 push @$devices, '-device', "isa-serial,chardev=qga0";
3685 }
3686 }
3687
3688 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3689 if ($rng && $version_guard->(4, 1, 2)) {
3690 check_rng_source($rng->{source});
3691
3692 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3693 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3694 my $limiter_str = "";
3695 if ($max_bytes) {
3696 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3697 }
3698
3699 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3700 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3701 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3702 }
3703
3704 my $spice_port;
3705
3706 if ($qxlnum) {
3707 if ($qxlnum > 1) {
3708 if ($winversion){
3709 for (my $i = 1; $i < $qxlnum; $i++){
3710 push @$devices, '-device', print_vga_device(
3711 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3712 }
3713 } else {
3714 # assume other OS works like Linux
3715 my ($ram, $vram) = ("134217728", "67108864");
3716 if ($vga->{memory}) {
3717 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3718 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3719 }
3720 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3721 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3722 }
3723 }
3724
3725 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3726
3727 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3728 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3729 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3730
3731 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3732 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3733 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3734
3735 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3736 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3737
3738 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3739 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3740 if ($spice_enhancement->{foldersharing}) {
3741 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3742 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3743 }
3744
3745 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3746 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3747 if $spice_enhancement->{videostreaming};
3748
3749 push @$devices, '-spice', "$spice_opts";
3750 }
3751
3752 # enable balloon by default, unless explicitly disabled
3753 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3754 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3755 push @$devices, '-device', "virtio-balloon-pci,id=balloon0$pciaddr";
3756 }
3757
3758 if ($conf->{watchdog}) {
3759 my $wdopts = parse_watchdog($conf->{watchdog});
3760 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3761 my $watchdog = $wdopts->{model} || 'i6300esb';
3762 push @$devices, '-device', "$watchdog$pciaddr";
3763 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3764 }
3765
3766 my $vollist = [];
3767 my $scsicontroller = {};
3768 my $ahcicontroller = {};
3769 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3770
3771 # Add iscsi initiator name if available
3772 if (my $initiator = get_initiator_name()) {
3773 push @$devices, '-iscsi', "initiator-name=$initiator";
3774 }
3775
3776 PVE::QemuConfig->foreach_volume($conf, sub {
3777 my ($ds, $drive) = @_;
3778
3779 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3780 check_volume_storage_type($storecfg, $drive->{file});
3781 push @$vollist, $drive->{file};
3782 }
3783
3784 # ignore efidisk here, already added in bios/fw handling code above
3785 return if $drive->{interface} eq 'efidisk';
3786 # similar for TPM
3787 return if $drive->{interface} eq 'tpmstate';
3788
3789 $use_virtio = 1 if $ds =~ m/^virtio/;
3790
3791 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3792
3793 if ($drive->{interface} eq 'virtio'){
3794 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
3795 }
3796
3797 if ($drive->{interface} eq 'scsi') {
3798
3799 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
3800
3801 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
3802 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
3803
3804 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
3805 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
3806
3807 my $iothread = '';
3808 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
3809 $iothread .= ",iothread=iothread-$controller_prefix$controller";
3810 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
3811 } elsif ($drive->{iothread}) {
3812 warn "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n";
3813 }
3814
3815 my $queues = '';
3816 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
3817 $queues = ",num_queues=$drive->{queues}";
3818 }
3819
3820 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
3821 if !$scsicontroller->{$controller};
3822 $scsicontroller->{$controller}=1;
3823 }
3824
3825 if ($drive->{interface} eq 'sata') {
3826 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
3827 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
3828 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
3829 if !$ahcicontroller->{$controller};
3830 $ahcicontroller->{$controller}=1;
3831 }
3832
3833 my $pbs_conf = $pbs_backing->{$ds};
3834 my $pbs_name = undef;
3835 if ($pbs_conf) {
3836 $pbs_name = "drive-$ds-pbs";
3837 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
3838 }
3839
3840 my $drive_cmd = print_drive_commandline_full(
3841 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
3842
3843 # extra protection for templates, but SATA and IDE don't support it..
3844 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
3845
3846 push @$devices, '-drive',$drive_cmd;
3847 push @$devices, '-device', print_drivedevice_full(
3848 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
3849 });
3850
3851 for (my $i = 0; $i < $MAX_NETS; $i++) {
3852 my $netname = "net$i";
3853
3854 next if !$conf->{$netname};
3855 my $d = parse_net($conf->{$netname});
3856 next if !$d;
3857
3858 $use_virtio = 1 if $d->{model} eq 'virtio';
3859
3860 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
3861
3862 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
3863 push @$devices, '-netdev', $netdevfull;
3864
3865 my $netdevicefull = print_netdevice_full(
3866 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type);
3867
3868 push @$devices, '-device', $netdevicefull;
3869 }
3870
3871 if ($conf->{ivshmem}) {
3872 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
3873
3874 my $bus;
3875 if ($q35) {
3876 $bus = print_pcie_addr("ivshmem");
3877 } else {
3878 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
3879 }
3880
3881 my $ivshmem_name = $ivshmem->{name} // $vmid;
3882 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
3883
3884 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
3885 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
3886 .",size=$ivshmem->{size}M";
3887 }
3888
3889 # pci.4 is nested in pci.1
3890 $bridges->{1} = 1 if $bridges->{4};
3891
3892 if (!$q35) { # add pci bridges
3893 if (min_version($machine_version, 2, 3)) {
3894 $bridges->{1} = 1;
3895 $bridges->{2} = 1;
3896 }
3897 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
3898 }
3899
3900 for my $k (sort {$b cmp $a} keys %$bridges) {
3901 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
3902
3903 my $k_name = $k;
3904 if ($k == 2 && $legacy_igd) {
3905 $k_name = "$k-igd";
3906 }
3907 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
3908 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
3909
3910 if ($q35) { # add after -readconfig pve-q35.cfg
3911 splice @$devices, 2, 0, '-device', $devstr;
3912 } else {
3913 unshift @$devices, '-device', $devstr if $k > 0;
3914 }
3915 }
3916
3917 if (!$kvm) {
3918 push @$machineFlags, 'accel=tcg';
3919 }
3920
3921 my $machine_type_min = $machine_type;
3922 if ($add_pve_version) {
3923 $machine_type_min =~ s/\+pve\d+$//;
3924 $machine_type_min .= "+pve$required_pve_version";
3925 }
3926 push @$machineFlags, "type=${machine_type_min}";
3927
3928 push @$cmd, @$devices;
3929 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
3930 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
3931 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
3932
3933 if (my $vmstate = $conf->{vmstate}) {
3934 my $statepath = PVE::Storage::path($storecfg, $vmstate);
3935 push @$vollist, $vmstate;
3936 push @$cmd, '-loadstate', $statepath;
3937 print "activating and using '$vmstate' as vmstate\n";
3938 }
3939
3940 if (PVE::QemuConfig->is_template($conf)) {
3941 # needed to workaround base volumes being read-only
3942 push @$cmd, '-snapshot';
3943 }
3944
3945 # add custom args
3946 if ($conf->{args}) {
3947 my $aa = PVE::Tools::split_args($conf->{args});
3948 push @$cmd, @$aa;
3949 }
3950
3951 return wantarray ? ($cmd, $vollist, $spice_port) : $cmd;
3952 }
3953
3954 sub check_rng_source {
3955 my ($source) = @_;
3956
3957 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
3958 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
3959 if ! -e $source;
3960
3961 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
3962 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
3963 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
3964 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
3965 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
3966 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
3967 ." to the host.\n";
3968 }
3969 }
3970
3971 sub spice_port {
3972 my ($vmid) = @_;
3973
3974 my $res = mon_cmd($vmid, 'query-spice');
3975
3976 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
3977 }
3978
3979 sub vm_devices_list {
3980 my ($vmid) = @_;
3981
3982 my $res = mon_cmd($vmid, 'query-pci');
3983 my $devices_to_check = [];
3984 my $devices = {};
3985 foreach my $pcibus (@$res) {
3986 push @$devices_to_check, @{$pcibus->{devices}},
3987 }
3988
3989 while (@$devices_to_check) {
3990 my $to_check = [];
3991 for my $d (@$devices_to_check) {
3992 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
3993 next if !$d->{'pci_bridge'};
3994
3995 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
3996 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
3997 }
3998 $devices_to_check = $to_check;
3999 }
4000
4001 my $resblock = mon_cmd($vmid, 'query-block');
4002 foreach my $block (@$resblock) {
4003 if($block->{device} =~ m/^drive-(\S+)/){
4004 $devices->{$1} = 1;
4005 }
4006 }
4007
4008 my $resmice = mon_cmd($vmid, 'query-mice');
4009 foreach my $mice (@$resmice) {
4010 if ($mice->{name} eq 'QEMU HID Tablet') {
4011 $devices->{tablet} = 1;
4012 last;
4013 }
4014 }
4015
4016 # for usb devices there is no query-usb
4017 # but we can iterate over the entries in
4018 # qom-list path=/machine/peripheral
4019 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4020 foreach my $per (@$resperipheral) {
4021 if ($per->{name} =~ m/^usb\d+$/) {
4022 $devices->{$per->{name}} = 1;
4023 }
4024 }
4025
4026 return $devices;
4027 }
4028
4029 sub vm_deviceplug {
4030 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4031
4032 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4033
4034 my $devices_list = vm_devices_list($vmid);
4035 return 1 if defined($devices_list->{$deviceid});
4036
4037 # add PCI bridge if we need it for the device
4038 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4039
4040 if ($deviceid eq 'tablet') {
4041 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4042 } elsif ($deviceid eq 'keyboard') {
4043 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4044 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4045 die "usb hotplug currently not reliable\n";
4046 # since we can't reliably hot unplug all added usb devices and usb
4047 # passthrough breaks live migration we disable usb hotplugging for now
4048 #qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device));
4049 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4050 qemu_iothread_add($vmid, $deviceid, $device);
4051
4052 qemu_driveadd($storecfg, $vmid, $device);
4053 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4054
4055 qemu_deviceadd($vmid, $devicefull);
4056 eval { qemu_deviceaddverify($vmid, $deviceid); };
4057 if (my $err = $@) {
4058 eval { qemu_drivedel($vmid, $deviceid); };
4059 warn $@ if $@;
4060 die $err;
4061 }
4062 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4063 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4064 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4065 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4066
4067 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4068
4069 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4070 qemu_iothread_add($vmid, $deviceid, $device);
4071 $devicefull .= ",iothread=iothread-$deviceid";
4072 }
4073
4074 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4075 $devicefull .= ",num_queues=$device->{queues}";
4076 }
4077
4078 qemu_deviceadd($vmid, $devicefull);
4079 qemu_deviceaddverify($vmid, $deviceid);
4080 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4081 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4082 qemu_driveadd($storecfg, $vmid, $device);
4083
4084 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4085 eval { qemu_deviceadd($vmid, $devicefull); };
4086 if (my $err = $@) {
4087 eval { qemu_drivedel($vmid, $deviceid); };
4088 warn $@ if $@;
4089 die $err;
4090 }
4091 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4092 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4093
4094 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4095 my $use_old_bios_files = undef;
4096 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4097
4098 my $netdevicefull = print_netdevice_full(
4099 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type);
4100 qemu_deviceadd($vmid, $netdevicefull);
4101 eval {
4102 qemu_deviceaddverify($vmid, $deviceid);
4103 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4104 };
4105 if (my $err = $@) {
4106 eval { qemu_netdevdel($vmid, $deviceid); };
4107 warn $@ if $@;
4108 die $err;
4109 }
4110 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4111 my $bridgeid = $2;
4112 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4113 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4114
4115 qemu_deviceadd($vmid, $devicefull);
4116 qemu_deviceaddverify($vmid, $deviceid);
4117 } else {
4118 die "can't hotplug device '$deviceid'\n";
4119 }
4120
4121 return 1;
4122 }
4123
4124 # fixme: this should raise exceptions on error!
4125 sub vm_deviceunplug {
4126 my ($vmid, $conf, $deviceid) = @_;
4127
4128 my $devices_list = vm_devices_list($vmid);
4129 return 1 if !defined($devices_list->{$deviceid});
4130
4131 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4132 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4133
4134 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard') {
4135 qemu_devicedel($vmid, $deviceid);
4136 } elsif ($deviceid =~ m/^usb\d+$/) {
4137 die "usb hotplug currently not reliable\n";
4138 # when unplugging usb devices this way, there may be remaining usb
4139 # controllers/hubs so we disable it for now
4140 #qemu_devicedel($vmid, $deviceid);
4141 #qemu_devicedelverify($vmid, $deviceid);
4142 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4143 my $device = parse_drive($deviceid, $conf->{$deviceid});
4144
4145 qemu_devicedel($vmid, $deviceid);
4146 qemu_devicedelverify($vmid, $deviceid);
4147 qemu_drivedel($vmid, $deviceid);
4148 qemu_iothread_del($vmid, $deviceid, $device);
4149 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4150 qemu_devicedel($vmid, $deviceid);
4151 qemu_devicedelverify($vmid, $deviceid);
4152 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4153 my $device = parse_drive($deviceid, $conf->{$deviceid});
4154
4155 qemu_devicedel($vmid, $deviceid);
4156 qemu_drivedel($vmid, $deviceid);
4157 qemu_deletescsihw($conf, $vmid, $deviceid);
4158
4159 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4160 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4161 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4162 qemu_devicedel($vmid, $deviceid);
4163 qemu_devicedelverify($vmid, $deviceid);
4164 qemu_netdevdel($vmid, $deviceid);
4165 } else {
4166 die "can't unplug device '$deviceid'\n";
4167 }
4168
4169 return 1;
4170 }
4171
4172 sub qemu_deviceadd {
4173 my ($vmid, $devicefull) = @_;
4174
4175 $devicefull = "driver=".$devicefull;
4176 my %options = split(/[=,]/, $devicefull);
4177
4178 mon_cmd($vmid, "device_add" , %options);
4179 }
4180
4181 sub qemu_devicedel {
4182 my ($vmid, $deviceid) = @_;
4183
4184 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
4185 }
4186
4187 sub qemu_iothread_add {
4188 my ($vmid, $deviceid, $device) = @_;
4189
4190 if ($device->{iothread}) {
4191 my $iothreads = vm_iothreads_list($vmid);
4192 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4193 }
4194 }
4195
4196 sub qemu_iothread_del {
4197 my ($vmid, $deviceid, $device) = @_;
4198
4199 if ($device->{iothread}) {
4200 my $iothreads = vm_iothreads_list($vmid);
4201 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4202 }
4203 }
4204
4205 sub qemu_objectadd {
4206 my ($vmid, $objectid, $qomtype) = @_;
4207
4208 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4209
4210 return 1;
4211 }
4212
4213 sub qemu_objectdel {
4214 my ($vmid, $objectid) = @_;
4215
4216 mon_cmd($vmid, "object-del", id => $objectid);
4217
4218 return 1;
4219 }
4220
4221 sub qemu_driveadd {
4222 my ($storecfg, $vmid, $device) = @_;
4223
4224 my $kvmver = get_running_qemu_version($vmid);
4225 my $io_uring = min_version($kvmver, 6, 0);
4226 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4227 $drive =~ s/\\/\\\\/g;
4228 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4229
4230 # If the command succeeds qemu prints: "OK"
4231 return 1 if $ret =~ m/OK/s;
4232
4233 die "adding drive failed: $ret\n";
4234 }
4235
4236 sub qemu_drivedel {
4237 my ($vmid, $deviceid) = @_;
4238
4239 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4240 $ret =~ s/^\s+//;
4241
4242 return 1 if $ret eq "";
4243
4244 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4245 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4246
4247 die "deleting drive $deviceid failed : $ret\n";
4248 }
4249
4250 sub qemu_deviceaddverify {
4251 my ($vmid, $deviceid) = @_;
4252
4253 for (my $i = 0; $i <= 5; $i++) {
4254 my $devices_list = vm_devices_list($vmid);
4255 return 1 if defined($devices_list->{$deviceid});
4256 sleep 1;
4257 }
4258
4259 die "error on hotplug device '$deviceid'\n";
4260 }
4261
4262
4263 sub qemu_devicedelverify {
4264 my ($vmid, $deviceid) = @_;
4265
4266 # need to verify that the device is correctly removed as device_del
4267 # is async and empty return is not reliable
4268
4269 for (my $i = 0; $i <= 5; $i++) {
4270 my $devices_list = vm_devices_list($vmid);
4271 return 1 if !defined($devices_list->{$deviceid});
4272 sleep 1;
4273 }
4274
4275 die "error on hot-unplugging device '$deviceid'\n";
4276 }
4277
4278 sub qemu_findorcreatescsihw {
4279 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4280
4281 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4282
4283 my $scsihwid="$controller_prefix$controller";
4284 my $devices_list = vm_devices_list($vmid);
4285
4286 if (!defined($devices_list->{$scsihwid})) {
4287 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4288 }
4289
4290 return 1;
4291 }
4292
4293 sub qemu_deletescsihw {
4294 my ($conf, $vmid, $opt) = @_;
4295
4296 my $device = parse_drive($opt, $conf->{$opt});
4297
4298 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4299 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4300 return 1;
4301 }
4302
4303 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4304
4305 my $devices_list = vm_devices_list($vmid);
4306 foreach my $opt (keys %{$devices_list}) {
4307 if (is_valid_drivename($opt)) {
4308 my $drive = parse_drive($opt, $conf->{$opt});
4309 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4310 return 1;
4311 }
4312 }
4313 }
4314
4315 my $scsihwid="scsihw$controller";
4316
4317 vm_deviceunplug($vmid, $conf, $scsihwid);
4318
4319 return 1;
4320 }
4321
4322 sub qemu_add_pci_bridge {
4323 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4324
4325 my $bridges = {};
4326
4327 my $bridgeid;
4328
4329 print_pci_addr($device, $bridges, $arch, $machine_type);
4330
4331 while (my ($k, $v) = each %$bridges) {
4332 $bridgeid = $k;
4333 }
4334 return 1 if !defined($bridgeid) || $bridgeid < 1;
4335
4336 my $bridge = "pci.$bridgeid";
4337 my $devices_list = vm_devices_list($vmid);
4338
4339 if (!defined($devices_list->{$bridge})) {
4340 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4341 }
4342
4343 return 1;
4344 }
4345
4346 sub qemu_set_link_status {
4347 my ($vmid, $device, $up) = @_;
4348
4349 mon_cmd($vmid, "set_link", name => $device,
4350 up => $up ? JSON::true : JSON::false);
4351 }
4352
4353 sub qemu_netdevadd {
4354 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4355
4356 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4357 my %options = split(/[=,]/, $netdev);
4358
4359 if (defined(my $vhost = $options{vhost})) {
4360 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4361 }
4362
4363 if (defined(my $queues = $options{queues})) {
4364 $options{queues} = $queues + 0;
4365 }
4366
4367 mon_cmd($vmid, "netdev_add", %options);
4368 return 1;
4369 }
4370
4371 sub qemu_netdevdel {
4372 my ($vmid, $deviceid) = @_;
4373
4374 mon_cmd($vmid, "netdev_del", id => $deviceid);
4375 }
4376
4377 sub qemu_usb_hotplug {
4378 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4379
4380 return if !$device;
4381
4382 # remove the old one first
4383 vm_deviceunplug($vmid, $conf, $deviceid);
4384
4385 # check if xhci controller is necessary and available
4386 if ($device->{usb3}) {
4387
4388 my $devicelist = vm_devices_list($vmid);
4389
4390 if (!$devicelist->{xhci}) {
4391 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4392 qemu_deviceadd($vmid, "nec-usb-xhci,id=xhci$pciaddr");
4393 }
4394 }
4395 my $d = parse_usb_device($device->{host});
4396 $d->{usb3} = $device->{usb3};
4397
4398 # add the new one
4399 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d, $arch, $machine_type);
4400 }
4401
4402 sub qemu_cpu_hotplug {
4403 my ($vmid, $conf, $vcpus) = @_;
4404
4405 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4406
4407 my $sockets = 1;
4408 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4409 $sockets = $conf->{sockets} if $conf->{sockets};
4410 my $cores = $conf->{cores} || 1;
4411 my $maxcpus = $sockets * $cores;
4412
4413 $vcpus = $maxcpus if !$vcpus;
4414
4415 die "you can't add more vcpus than maxcpus\n"
4416 if $vcpus > $maxcpus;
4417
4418 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4419
4420 if ($vcpus < $currentvcpus) {
4421
4422 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4423
4424 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4425 qemu_devicedel($vmid, "cpu$i");
4426 my $retry = 0;
4427 my $currentrunningvcpus = undef;
4428 while (1) {
4429 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4430 last if scalar(@{$currentrunningvcpus}) == $i-1;
4431 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4432 $retry++;
4433 sleep 1;
4434 }
4435 #update conf after each succesfull cpu unplug
4436 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4437 PVE::QemuConfig->write_config($vmid, $conf);
4438 }
4439 } else {
4440 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4441 }
4442
4443 return;
4444 }
4445
4446 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4447 die "vcpus in running vm does not match its configuration\n"
4448 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4449
4450 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4451
4452 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4453 my $cpustr = print_cpu_device($conf, $i);
4454 qemu_deviceadd($vmid, $cpustr);
4455
4456 my $retry = 0;
4457 my $currentrunningvcpus = undef;
4458 while (1) {
4459 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4460 last if scalar(@{$currentrunningvcpus}) == $i;
4461 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4462 sleep 1;
4463 $retry++;
4464 }
4465 #update conf after each succesfull cpu hotplug
4466 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4467 PVE::QemuConfig->write_config($vmid, $conf);
4468 }
4469 } else {
4470
4471 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4472 mon_cmd($vmid, "cpu-add", id => int($i));
4473 }
4474 }
4475 }
4476
4477 sub qemu_block_set_io_throttle {
4478 my ($vmid, $deviceid,
4479 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4480 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4481 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4482 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4483
4484 return if !check_running($vmid) ;
4485
4486 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4487 bps => int($bps),
4488 bps_rd => int($bps_rd),
4489 bps_wr => int($bps_wr),
4490 iops => int($iops),
4491 iops_rd => int($iops_rd),
4492 iops_wr => int($iops_wr),
4493 bps_max => int($bps_max),
4494 bps_rd_max => int($bps_rd_max),
4495 bps_wr_max => int($bps_wr_max),
4496 iops_max => int($iops_max),
4497 iops_rd_max => int($iops_rd_max),
4498 iops_wr_max => int($iops_wr_max),
4499 bps_max_length => int($bps_max_length),
4500 bps_rd_max_length => int($bps_rd_max_length),
4501 bps_wr_max_length => int($bps_wr_max_length),
4502 iops_max_length => int($iops_max_length),
4503 iops_rd_max_length => int($iops_rd_max_length),
4504 iops_wr_max_length => int($iops_wr_max_length),
4505 );
4506
4507 }
4508
4509 sub qemu_block_resize {
4510 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4511
4512 my $running = check_running($vmid);
4513
4514 $size = 0 if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4515
4516 return if !$running;
4517
4518 my $padding = (1024 - $size % 1024) % 1024;
4519 $size = $size + $padding;
4520
4521 mon_cmd(
4522 $vmid,
4523 "block_resize",
4524 device => $deviceid,
4525 size => int($size),
4526 timeout => 60,
4527 );
4528 }
4529
4530 sub qemu_volume_snapshot {
4531 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4532
4533 my $running = check_running($vmid);
4534
4535 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4536 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4537 } else {
4538 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4539 }
4540 }
4541
4542 sub qemu_volume_snapshot_delete {
4543 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4544
4545 my $running = check_running($vmid);
4546
4547 if($running) {
4548
4549 $running = undef;
4550 my $conf = PVE::QemuConfig->load_config($vmid);
4551 PVE::QemuConfig->foreach_volume($conf, sub {
4552 my ($ds, $drive) = @_;
4553 $running = 1 if $drive->{file} eq $volid;
4554 });
4555 }
4556
4557 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4558 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
4559 } else {
4560 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4561 }
4562 }
4563
4564 sub set_migration_caps {
4565 my ($vmid, $savevm) = @_;
4566
4567 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4568
4569 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4570 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4571
4572 my $cap_ref = [];
4573
4574 my $enabled_cap = {
4575 "auto-converge" => 1,
4576 "xbzrle" => 1,
4577 "x-rdma-pin-all" => 0,
4578 "zero-blocks" => 0,
4579 "compress" => 0,
4580 "dirty-bitmaps" => $dirty_bitmaps,
4581 };
4582
4583 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4584
4585 for my $supported_capability (@$supported_capabilities) {
4586 push @$cap_ref, {
4587 capability => $supported_capability->{capability},
4588 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4589 };
4590 }
4591
4592 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4593 }
4594
4595 sub foreach_volid {
4596 my ($conf, $func, @param) = @_;
4597
4598 my $volhash = {};
4599
4600 my $test_volid = sub {
4601 my ($key, $drive, $snapname) = @_;
4602
4603 my $volid = $drive->{file};
4604 return if !$volid;
4605
4606 $volhash->{$volid}->{cdrom} //= 1;
4607 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4608
4609 my $replicate = $drive->{replicate} // 1;
4610 $volhash->{$volid}->{replicate} //= 0;
4611 $volhash->{$volid}->{replicate} = 1 if $replicate;
4612
4613 $volhash->{$volid}->{shared} //= 0;
4614 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4615
4616 $volhash->{$volid}->{referenced_in_config} //= 0;
4617 $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname);
4618
4619 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4620 if defined($snapname);
4621
4622 my $size = $drive->{size};
4623 $volhash->{$volid}->{size} //= $size if $size;
4624
4625 $volhash->{$volid}->{is_vmstate} //= 0;
4626 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4627
4628 $volhash->{$volid}->{is_tpmstate} //= 0;
4629 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4630
4631 $volhash->{$volid}->{is_unused} //= 0;
4632 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4633
4634 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4635 };
4636
4637 my $include_opts = {
4638 extra_keys => ['vmstate'],
4639 include_unused => 1,
4640 };
4641
4642 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4643 foreach my $snapname (keys %{$conf->{snapshots}}) {
4644 my $snap = $conf->{snapshots}->{$snapname};
4645 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4646 }
4647
4648 foreach my $volid (keys %$volhash) {
4649 &$func($volid, $volhash->{$volid}, @param);
4650 }
4651 }
4652
4653 my $fast_plug_option = {
4654 'lock' => 1,
4655 'name' => 1,
4656 'onboot' => 1,
4657 'shares' => 1,
4658 'startup' => 1,
4659 'description' => 1,
4660 'protection' => 1,
4661 'vmstatestorage' => 1,
4662 'hookscript' => 1,
4663 'tags' => 1,
4664 };
4665
4666 # hotplug changes in [PENDING]
4667 # $selection hash can be used to only apply specified options, for
4668 # example: { cores => 1 } (only apply changed 'cores')
4669 # $errors ref is used to return error messages
4670 sub vmconfig_hotplug_pending {
4671 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4672
4673 my $defaults = load_defaults();
4674 my $arch = get_vm_arch($conf);
4675 my $machine_type = get_vm_machine($conf, undef, $arch);
4676
4677 # commit values which do not have any impact on running VM first
4678 # Note: those option cannot raise errors, we we do not care about
4679 # $selection and always apply them.
4680
4681 my $add_error = sub {
4682 my ($opt, $msg) = @_;
4683 $errors->{$opt} = "hotplug problem - $msg";
4684 };
4685
4686 my $changes = 0;
4687 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4688 if ($fast_plug_option->{$opt}) {
4689 $conf->{$opt} = $conf->{pending}->{$opt};
4690 delete $conf->{pending}->{$opt};
4691 $changes = 1;
4692 }
4693 }
4694
4695 if ($changes) {
4696 PVE::QemuConfig->write_config($vmid, $conf);
4697 }
4698
4699 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
4700
4701 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
4702 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4703 foreach my $opt (sort keys %$pending_delete_hash) {
4704 next if $selection && !$selection->{$opt};
4705 my $force = $pending_delete_hash->{$opt}->{force};
4706 eval {
4707 if ($opt eq 'hotplug') {
4708 die "skip\n" if ($conf->{hotplug} =~ /memory/);
4709 } elsif ($opt eq 'tablet') {
4710 die "skip\n" if !$hotplug_features->{usb};
4711 if ($defaults->{tablet}) {
4712 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4713 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4714 if $arch eq 'aarch64';
4715 } else {
4716 vm_deviceunplug($vmid, $conf, 'tablet');
4717 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4718 }
4719 } elsif ($opt =~ m/^usb\d+/) {
4720 die "skip\n";
4721 # since we cannot reliably hot unplug usb devices we are disabling it
4722 #die "skip\n" if !$hotplug_features->{usb} || $conf->{$opt} =~ m/spice/i;
4723 #vm_deviceunplug($vmid, $conf, $opt);
4724 } elsif ($opt eq 'vcpus') {
4725 die "skip\n" if !$hotplug_features->{cpu};
4726 qemu_cpu_hotplug($vmid, $conf, undef);
4727 } elsif ($opt eq 'balloon') {
4728 # enable balloon device is not hotpluggable
4729 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
4730 # here we reset the ballooning value to memory
4731 my $balloon = $conf->{memory} || $defaults->{memory};
4732 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4733 } elsif ($fast_plug_option->{$opt}) {
4734 # do nothing
4735 } elsif ($opt =~ m/^net(\d+)$/) {
4736 die "skip\n" if !$hotplug_features->{network};
4737 vm_deviceunplug($vmid, $conf, $opt);
4738 } elsif (is_valid_drivename($opt)) {
4739 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
4740 vm_deviceunplug($vmid, $conf, $opt);
4741 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4742 } elsif ($opt =~ m/^memory$/) {
4743 die "skip\n" if !$hotplug_features->{memory};
4744 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
4745 } elsif ($opt eq 'cpuunits') {
4746 $cgroup->change_cpu_shares(undef, 1024);
4747 } elsif ($opt eq 'cpulimit') {
4748 $cgroup->change_cpu_quota(-1, 100000);
4749 } else {
4750 die "skip\n";
4751 }
4752 };
4753 if (my $err = $@) {
4754 &$add_error($opt, $err) if $err ne "skip\n";
4755 } else {
4756 delete $conf->{$opt};
4757 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4758 }
4759 }
4760
4761 my ($apply_pending_cloudinit, $apply_pending_cloudinit_done);
4762 $apply_pending_cloudinit = sub {
4763 return if $apply_pending_cloudinit_done; # once is enough
4764 $apply_pending_cloudinit_done = 1; # once is enough
4765
4766 my ($key, $value) = @_;
4767
4768 my @cloudinit_opts = keys %$confdesc_cloudinit;
4769 foreach my $opt (keys %{$conf->{pending}}) {
4770 next if !grep { $_ eq $opt } @cloudinit_opts;
4771 $conf->{$opt} = delete $conf->{pending}->{$opt};
4772 }
4773
4774 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4775 foreach my $opt (sort keys %$pending_delete_hash) {
4776 next if !grep { $_ eq $opt } @cloudinit_opts;
4777 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4778 delete $conf->{$opt};
4779 }
4780
4781 my $new_conf = { %$conf };
4782 $new_conf->{$key} = $value;
4783 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($new_conf, $vmid);
4784 };
4785
4786 foreach my $opt (keys %{$conf->{pending}}) {
4787 next if $selection && !$selection->{$opt};
4788 my $value = $conf->{pending}->{$opt};
4789 eval {
4790 if ($opt eq 'hotplug') {
4791 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
4792 } elsif ($opt eq 'tablet') {
4793 die "skip\n" if !$hotplug_features->{usb};
4794 if ($value == 1) {
4795 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4796 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4797 if $arch eq 'aarch64';
4798 } elsif ($value == 0) {
4799 vm_deviceunplug($vmid, $conf, 'tablet');
4800 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4801 }
4802 } elsif ($opt =~ m/^usb\d+$/) {
4803 die "skip\n";
4804 # since we cannot reliably hot unplug usb devices we disable it for now
4805 #die "skip\n" if !$hotplug_features->{usb} || $value =~ m/spice/i;
4806 #my $d = eval { parse_property_string($usbdesc->{format}, $value) };
4807 #die "skip\n" if !$d;
4808 #qemu_usb_hotplug($storecfg, $conf, $vmid, $opt, $d, $arch, $machine_type);
4809 } elsif ($opt eq 'vcpus') {
4810 die "skip\n" if !$hotplug_features->{cpu};
4811 qemu_cpu_hotplug($vmid, $conf, $value);
4812 } elsif ($opt eq 'balloon') {
4813 # enable/disable balloning device is not hotpluggable
4814 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
4815 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
4816 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
4817
4818 # allow manual ballooning if shares is set to zero
4819 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
4820 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
4821 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4822 }
4823 } elsif ($opt =~ m/^net(\d+)$/) {
4824 # some changes can be done without hotplug
4825 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
4826 $vmid, $opt, $value, $arch, $machine_type);
4827 } elsif (is_valid_drivename($opt)) {
4828 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
4829 # some changes can be done without hotplug
4830 my $drive = parse_drive($opt, $value);
4831 if (drive_is_cloudinit($drive)) {
4832 &$apply_pending_cloudinit($opt, $value);
4833 }
4834 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
4835 $vmid, $opt, $value, $arch, $machine_type);
4836 } elsif ($opt =~ m/^memory$/) { #dimms
4837 die "skip\n" if !$hotplug_features->{memory};
4838 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
4839 } elsif ($opt eq 'cpuunits') {
4840 $cgroup->change_cpu_shares($conf->{pending}->{$opt}, 1024);
4841 } elsif ($opt eq 'cpulimit') {
4842 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
4843 $cgroup->change_cpu_quota($cpulimit, 100000);
4844 } else {
4845 die "skip\n"; # skip non-hot-pluggable options
4846 }
4847 };
4848 if (my $err = $@) {
4849 &$add_error($opt, $err) if $err ne "skip\n";
4850 } else {
4851 $conf->{$opt} = $value;
4852 delete $conf->{pending}->{$opt};
4853 }
4854 }
4855
4856 PVE::QemuConfig->write_config($vmid, $conf);
4857 }
4858
4859 sub try_deallocate_drive {
4860 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
4861
4862 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
4863 my $volid = $drive->{file};
4864 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
4865 my $sid = PVE::Storage::parse_volume_id($volid);
4866 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
4867
4868 # check if the disk is really unused
4869 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
4870 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
4871 PVE::Storage::vdisk_free($storecfg, $volid);
4872 return 1;
4873 } else {
4874 # If vm is not owner of this disk remove from config
4875 return 1;
4876 }
4877 }
4878
4879 return;
4880 }
4881
4882 sub vmconfig_delete_or_detach_drive {
4883 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
4884
4885 my $drive = parse_drive($opt, $conf->{$opt});
4886
4887 my $rpcenv = PVE::RPCEnvironment::get();
4888 my $authuser = $rpcenv->get_user();
4889
4890 if ($force) {
4891 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
4892 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
4893 } else {
4894 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
4895 }
4896 }
4897
4898
4899
4900 sub vmconfig_apply_pending {
4901 my ($vmid, $conf, $storecfg, $errors) = @_;
4902
4903 my $add_apply_error = sub {
4904 my ($opt, $msg) = @_;
4905 my $err_msg = "unable to apply pending change $opt : $msg";
4906 $errors->{$opt} = $err_msg;
4907 warn $err_msg;
4908 };
4909
4910 # cold plug
4911
4912 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4913 foreach my $opt (sort keys %$pending_delete_hash) {
4914 my $force = $pending_delete_hash->{$opt}->{force};
4915 eval {
4916 if ($opt =~ m/^unused/) {
4917 die "internal error";
4918 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
4919 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4920 }
4921 };
4922 if (my $err = $@) {
4923 $add_apply_error->($opt, $err);
4924 } else {
4925 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4926 delete $conf->{$opt};
4927 }
4928 }
4929
4930 PVE::QemuConfig->cleanup_pending($conf);
4931
4932 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4933 next if $opt eq 'delete'; # just to be sure
4934 eval {
4935 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
4936 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
4937 }
4938 };
4939 if (my $err = $@) {
4940 $add_apply_error->($opt, $err);
4941 } else {
4942 $conf->{$opt} = delete $conf->{pending}->{$opt};
4943 }
4944 }
4945
4946 # write all changes at once to avoid unnecessary i/o
4947 PVE::QemuConfig->write_config($vmid, $conf);
4948 }
4949
4950 sub vmconfig_update_net {
4951 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
4952
4953 my $newnet = parse_net($value);
4954
4955 if ($conf->{$opt}) {
4956 my $oldnet = parse_net($conf->{$opt});
4957
4958 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
4959 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
4960 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
4961 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
4962
4963 # for non online change, we try to hot-unplug
4964 die "skip\n" if !$hotplug;
4965 vm_deviceunplug($vmid, $conf, $opt);
4966 } else {
4967
4968 die "internal error" if $opt !~ m/net(\d+)/;
4969 my $iface = "tap${vmid}i$1";
4970
4971 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
4972 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
4973 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
4974 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
4975 PVE::Network::tap_unplug($iface);
4976
4977 if ($have_sdn) {
4978 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
4979 } else {
4980 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
4981 }
4982 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
4983 # Rate can be applied on its own but any change above needs to
4984 # include the rate in tap_plug since OVS resets everything.
4985 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
4986 }
4987
4988 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
4989 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
4990 }
4991
4992 return 1;
4993 }
4994 }
4995
4996 if ($hotplug) {
4997 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
4998 } else {
4999 die "skip\n";
5000 }
5001 }
5002
5003 sub vmconfig_update_disk {
5004 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5005
5006 my $drive = parse_drive($opt, $value);
5007
5008 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5009 my $media = $drive->{media} || 'disk';
5010 my $oldmedia = $old_drive->{media} || 'disk';
5011 die "unable to change media type\n" if $media ne $oldmedia;
5012
5013 if (!drive_is_cdrom($old_drive)) {
5014
5015 if ($drive->{file} ne $old_drive->{file}) {
5016
5017 die "skip\n" if !$hotplug;
5018
5019 # unplug and register as unused
5020 vm_deviceunplug($vmid, $conf, $opt);
5021 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5022
5023 } else {
5024 # update existing disk
5025
5026 # skip non hotpluggable value
5027 if (safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5028 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5029 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5030 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5031 safe_string_ne($drive->{ssd}, $old_drive->{ssd})) {
5032 die "skip\n";
5033 }
5034
5035 # apply throttle
5036 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5037 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5038 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5039 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5040 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5041 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5042 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5043 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5044 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5045 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5046 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5047 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5048 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5049 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5050 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5051 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5052 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5053 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5054
5055 qemu_block_set_io_throttle(
5056 $vmid,"drive-$opt",
5057 ($drive->{mbps} || 0)*1024*1024,
5058 ($drive->{mbps_rd} || 0)*1024*1024,
5059 ($drive->{mbps_wr} || 0)*1024*1024,
5060 $drive->{iops} || 0,
5061 $drive->{iops_rd} || 0,
5062 $drive->{iops_wr} || 0,
5063 ($drive->{mbps_max} || 0)*1024*1024,
5064 ($drive->{mbps_rd_max} || 0)*1024*1024,
5065 ($drive->{mbps_wr_max} || 0)*1024*1024,
5066 $drive->{iops_max} || 0,
5067 $drive->{iops_rd_max} || 0,
5068 $drive->{iops_wr_max} || 0,
5069 $drive->{bps_max_length} || 1,
5070 $drive->{bps_rd_max_length} || 1,
5071 $drive->{bps_wr_max_length} || 1,
5072 $drive->{iops_max_length} || 1,
5073 $drive->{iops_rd_max_length} || 1,
5074 $drive->{iops_wr_max_length} || 1,
5075 );
5076
5077 }
5078
5079 return 1;
5080 }
5081
5082 } else { # cdrom
5083
5084 if ($drive->{file} eq 'none') {
5085 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5086 if (drive_is_cloudinit($old_drive)) {
5087 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5088 }
5089 } else {
5090 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5091
5092 # force eject if locked
5093 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5094
5095 if ($path) {
5096 mon_cmd($vmid, "blockdev-change-medium",
5097 id => "$opt", filename => "$path");
5098 }
5099 }
5100
5101 return 1;
5102 }
5103 }
5104
5105 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5106 # hotplug new disks
5107 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5108 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5109 }
5110
5111 # called in locked context by incoming migration
5112 sub vm_migrate_get_nbd_disks {
5113 my ($storecfg, $conf, $replicated_volumes) = @_;
5114
5115 my $local_volumes = {};
5116 PVE::QemuConfig->foreach_volume($conf, sub {
5117 my ($ds, $drive) = @_;
5118
5119 return if drive_is_cdrom($drive);
5120
5121 my $volid = $drive->{file};
5122
5123 return if !$volid;
5124
5125 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5126
5127 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5128 return if $scfg->{shared};
5129
5130 # replicated disks re-use existing state via bitmap
5131 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5132 $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
5133 });
5134 return $local_volumes;
5135 }
5136
5137 # called in locked context by incoming migration
5138 sub vm_migrate_alloc_nbd_disks {
5139 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5140
5141 my $format = undef;
5142
5143 my $nbd = {};
5144 foreach my $opt (sort keys %$source_volumes) {
5145 my ($volid, $storeid, $volname, $drive, $use_existing) = @{$source_volumes->{$opt}};
5146
5147 if ($use_existing) {
5148 $nbd->{$opt}->{drivestr} = print_drive($drive);
5149 $nbd->{$opt}->{volid} = $volid;
5150 $nbd->{$opt}->{replicated} = 1;
5151 next;
5152 }
5153
5154 # If a remote storage is specified and the format of the original
5155 # volume is not available there, fall back to the default format.
5156 # Otherwise use the same format as the original.
5157 if (!$storagemap->{identity}) {
5158 $storeid = map_storage($storagemap, $storeid);
5159 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5160 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5161 my $fileFormat = qemu_img_format($scfg, $volname);
5162 $format = (grep {$fileFormat eq $_} @{$validFormats}) ? $fileFormat : $defFormat;
5163 } else {
5164 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5165 $format = qemu_img_format($scfg, $volname);
5166 }
5167
5168 my $size = $drive->{size} / 1024;
5169 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5170 my $newdrive = $drive;
5171 $newdrive->{format} = $format;
5172 $newdrive->{file} = $newvolid;
5173 my $drivestr = print_drive($newdrive);
5174 $nbd->{$opt}->{drivestr} = $drivestr;
5175 $nbd->{$opt}->{volid} = $newvolid;
5176 }
5177
5178 return $nbd;
5179 }
5180
5181 # see vm_start_nolock for parameters, additionally:
5182 # migrate_opts:
5183 # storagemap = parsed storage map for allocating NBD disks
5184 sub vm_start {
5185 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5186
5187 return PVE::QemuConfig->lock_config($vmid, sub {
5188 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5189
5190 die "you can't start a vm if it's a template\n"
5191 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5192
5193 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5194 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5195
5196 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5197
5198 if ($has_backup_lock && $running) {
5199 # a backup is currently running, attempt to start the guest in the
5200 # existing QEMU instance
5201 return vm_resume($vmid);
5202 }
5203
5204 PVE::QemuConfig->check_lock($conf)
5205 if !($params->{skiplock} || $has_suspended_lock);
5206
5207 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5208
5209 die "VM $vmid already running\n" if $running;
5210
5211 if (my $storagemap = $migrate_opts->{storagemap}) {
5212 my $replicated = $migrate_opts->{replicated_volumes};
5213 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5214 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5215
5216 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5217 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5218 }
5219 }
5220
5221 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5222 });
5223 }
5224
5225
5226 # params:
5227 # statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5228 # skiplock => 0/1, skip checking for config lock
5229 # skiptemplate => 0/1, skip checking whether VM is template
5230 # forcemachine => to force Qemu machine (rollback/migration)
5231 # forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5232 # timeout => in seconds
5233 # paused => start VM in paused state (backup)
5234 # resume => resume from hibernation
5235 # pbs-backing => {
5236 # sata0 => {
5237 # repository
5238 # snapshot
5239 # keyfile
5240 # archive
5241 # },
5242 # virtio2 => ...
5243 # }
5244 # migrate_opts:
5245 # nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5246 # migratedfrom => source node
5247 # spice_ticket => used for spice migration, passed via tunnel/stdin
5248 # network => CIDR of migration network
5249 # type => secure/insecure - tunnel over encrypted connection or plain-text
5250 # nbd_proto_version => int, 0 for TCP, 1 for UNIX
5251 # replicated_volumes = which volids should be re-used with bitmaps for nbd migration
5252 sub vm_start_nolock {
5253 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5254
5255 my $statefile = $params->{statefile};
5256 my $resume = $params->{resume};
5257
5258 my $migratedfrom = $migrate_opts->{migratedfrom};
5259 my $migration_type = $migrate_opts->{type};
5260
5261 my $res = {};
5262
5263 # clean up leftover reboot request files
5264 eval { clear_reboot_request($vmid); };
5265 warn $@ if $@;
5266
5267 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5268 vmconfig_apply_pending($vmid, $conf, $storecfg);
5269 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5270 }
5271
5272 # don't regenerate the ISO if the VM is started as part of a live migration
5273 # this way we can reuse the old ISO with the correct config
5274 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if !$migratedfrom;
5275
5276 my $defaults = load_defaults();
5277
5278 # set environment variable useful inside network script
5279 $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom;
5280
5281 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5282
5283 my $forcemachine = $params->{forcemachine};
5284 my $forcecpu = $params->{forcecpu};
5285 if ($resume) {
5286 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5287 $forcemachine = $conf->{runningmachine};
5288 $forcecpu = $conf->{runningcpu};
5289 print "Resuming suspended VM\n";
5290 }
5291
5292 my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid,
5293 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
5294
5295 my $migration_ip;
5296 my $get_migration_ip = sub {
5297 my ($nodename) = @_;
5298
5299 return $migration_ip if defined($migration_ip);
5300
5301 my $cidr = $migrate_opts->{network};
5302
5303 if (!defined($cidr)) {
5304 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5305 $cidr = $dc_conf->{migration}->{network};
5306 }
5307
5308 if (defined($cidr)) {
5309 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5310
5311 die "could not get IP: no address configured on local " .
5312 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5313
5314 die "could not get IP: multiple addresses configured on local " .
5315 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5316
5317 $migration_ip = @$ips[0];
5318 }
5319
5320 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5321 if !defined($migration_ip);
5322
5323 return $migration_ip;
5324 };
5325
5326 my $migrate_uri;
5327 if ($statefile) {
5328 if ($statefile eq 'tcp') {
5329 my $localip = "localhost";
5330 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5331 my $nodename = nodename();
5332
5333 if (!defined($migration_type)) {
5334 if (defined($datacenterconf->{migration}->{type})) {
5335 $migration_type = $datacenterconf->{migration}->{type};
5336 } else {
5337 $migration_type = 'secure';
5338 }
5339 }
5340
5341 if ($migration_type eq 'insecure') {
5342 $localip = $get_migration_ip->($nodename);
5343 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5344 }
5345
5346 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5347 my $migrate_port = PVE::Tools::next_migrate_port($pfamily);
5348 $migrate_uri = "tcp:${localip}:${migrate_port}";
5349 push @$cmd, '-incoming', $migrate_uri;
5350 push @$cmd, '-S';
5351
5352 } elsif ($statefile eq 'unix') {
5353 # should be default for secure migrations as a ssh TCP forward
5354 # tunnel is not deterministic reliable ready and fails regurarly
5355 # to set up in time, so use UNIX socket forwards
5356 my $socket_addr = "/run/qemu-server/$vmid.migrate";
5357 unlink $socket_addr;
5358
5359 $migrate_uri = "unix:$socket_addr";
5360
5361 push @$cmd, '-incoming', $migrate_uri;
5362 push @$cmd, '-S';
5363
5364 } elsif (-e $statefile) {
5365 push @$cmd, '-loadstate', $statefile;
5366 } else {
5367 my $statepath = PVE::Storage::path($storecfg, $statefile);
5368 push @$vollist, $statefile;
5369 push @$cmd, '-loadstate', $statepath;
5370 }
5371 } elsif ($params->{paused}) {
5372 push @$cmd, '-S';
5373 }
5374
5375 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5376
5377 my $pci_devices = {}; # host pci devices
5378 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
5379 my $dev = $conf->{"hostpci$i"} or next;
5380 $pci_devices->{$i} = parse_hostpci($dev);
5381 }
5382
5383 my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } values $pci_devices->%* ];
5384 # reserve all PCI IDs before actually doing anything with them
5385 PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout);
5386
5387 eval {
5388 for my $id (sort keys %$pci_devices) {
5389 my $d = $pci_devices->{$id};
5390 for my $dev ($d->{pciid}->@*) {
5391 PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
5392 }
5393 }
5394 };
5395 if (my $err = $@) {
5396 eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
5397 warn $@ if $@;
5398 die $err;
5399 }
5400
5401 PVE::Storage::activate_volumes($storecfg, $vollist);
5402
5403 eval {
5404 run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{});
5405 };
5406 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5407 # timeout should be more than enough here...
5408 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 5);
5409
5410 my $cpuunits = get_cpuunits($conf);
5411
5412 my %run_params = (
5413 timeout => $statefile ? undef : $start_timeout,
5414 umask => 0077,
5415 noerr => 1,
5416 );
5417
5418 # when migrating, prefix QEMU output so other side can pick up any
5419 # errors that might occur and show the user
5420 if ($migratedfrom) {
5421 $run_params{quiet} = 1;
5422 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5423 }
5424
5425 my %systemd_properties = (
5426 Slice => 'qemu.slice',
5427 KillMode => 'process',
5428 SendSIGKILL => 0,
5429 TimeoutStopUSec => ULONG_MAX, # infinity
5430 );
5431
5432 if (PVE::CGroup::cgroup_mode() == 2) {
5433 $cpuunits = 10000 if $cpuunits >= 10000; # else we get an error
5434 $systemd_properties{CPUWeight} = $cpuunits;
5435 } else {
5436 $systemd_properties{CPUShares} = $cpuunits;
5437 }
5438
5439 if (my $cpulimit = $conf->{cpulimit}) {
5440 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5441 }
5442 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5443
5444 my $run_qemu = sub {
5445 PVE::Tools::run_fork sub {
5446 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5447
5448 my $tpmpid;
5449 if (my $tpm = $conf->{tpmstate0}) {
5450 # start the TPM emulator so QEMU can connect on start
5451 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5452 }
5453
5454 my $exitcode = run_command($cmd, %run_params);
5455 if ($exitcode) {
5456 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5457 kill 'TERM', $tpmpid if $tpmpid;
5458 die "QEMU exited with code $exitcode\n";
5459 }
5460 };
5461 };
5462
5463 if ($conf->{hugepages}) {
5464
5465 my $code = sub {
5466 my $hugepages_topology = PVE::QemuServer::Memory::hugepages_topology($conf);
5467 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5468
5469 PVE::QemuServer::Memory::hugepages_mount();
5470 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5471
5472 eval { $run_qemu->() };
5473 if (my $err = $@) {
5474 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5475 if !$conf->{keephugepages};
5476 die $err;
5477 }
5478
5479 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5480 if !$conf->{keephugepages};
5481 };
5482 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5483
5484 } else {
5485 eval { $run_qemu->() };
5486 }
5487
5488 if (my $err = $@) {
5489 # deactivate volumes if start fails
5490 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5491 eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
5492
5493 die "start failed: $err";
5494 }
5495
5496 # re-reserve all PCI IDs now that we can know the actual VM PID
5497 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5498 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) };
5499 warn $@ if $@;
5500
5501 print "migration listens on $migrate_uri\n" if $migrate_uri;
5502 $res->{migrate_uri} = $migrate_uri;
5503
5504 if ($statefile && $statefile ne 'tcp' && $statefile ne 'unix') {
5505 eval { mon_cmd($vmid, "cont"); };
5506 warn $@ if $@;
5507 }
5508
5509 #start nbd server for storage migration
5510 if (my $nbd = $migrate_opts->{nbd}) {
5511 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
5512
5513 my $migrate_storage_uri;
5514 # nbd_protocol_version > 0 for unix socket support
5515 if ($nbd_protocol_version > 0 && $migration_type eq 'secure') {
5516 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5517 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5518 $migrate_storage_uri = "nbd:unix:$socket_path";
5519 } else {
5520 my $nodename = nodename();
5521 my $localip = $get_migration_ip->($nodename);
5522 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5523 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
5524
5525 mon_cmd($vmid, "nbd-server-start", addr => {
5526 type => 'inet',
5527 data => {
5528 host => "${localip}",
5529 port => "${storage_migrate_port}",
5530 },
5531 });
5532 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5533 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
5534 }
5535
5536 $res->{migrate_storage_uri} = $migrate_storage_uri;
5537
5538 foreach my $opt (sort keys %$nbd) {
5539 my $drivestr = $nbd->{$opt}->{drivestr};
5540 my $volid = $nbd->{$opt}->{volid};
5541 mon_cmd($vmid, "nbd-server-add", device => "drive-$opt", writable => JSON::true );
5542 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
5543 print "storage migration listens on $nbd_uri volume:$drivestr\n";
5544 print "re-using replicated volume: $opt - $volid\n"
5545 if $nbd->{$opt}->{replicated};
5546
5547 $res->{drives}->{$opt} = $nbd->{$opt};
5548 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
5549 }
5550 }
5551
5552 if ($migratedfrom) {
5553 eval {
5554 set_migration_caps($vmid);
5555 };
5556 warn $@ if $@;
5557
5558 if ($spice_port) {
5559 print "spice listens on port $spice_port\n";
5560 $res->{spice_port} = $spice_port;
5561 if ($migrate_opts->{spice_ticket}) {
5562 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
5563 $migrate_opts->{spice_ticket});
5564 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
5565 }
5566 }
5567
5568 } else {
5569 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
5570 if !$statefile && $conf->{balloon};
5571
5572 foreach my $opt (keys %$conf) {
5573 next if $opt !~ m/^net\d+$/;
5574 my $nicconf = parse_net($conf->{$opt});
5575 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
5576 }
5577 }
5578
5579 mon_cmd($vmid, 'qom-set',
5580 path => "machine/peripheral/balloon0",
5581 property => "guest-stats-polling-interval",
5582 value => 2) if (!defined($conf->{balloon}) || $conf->{balloon});
5583
5584 if ($resume) {
5585 print "Resumed VM, removing state\n";
5586 if (my $vmstate = $conf->{vmstate}) {
5587 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
5588 PVE::Storage::vdisk_free($storecfg, $vmstate);
5589 }
5590 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
5591 PVE::QemuConfig->write_config($vmid, $conf);
5592 }
5593
5594 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
5595
5596 return $res;
5597 }
5598
5599 sub vm_commandline {
5600 my ($storecfg, $vmid, $snapname) = @_;
5601
5602 my $conf = PVE::QemuConfig->load_config($vmid);
5603 my $forcemachine;
5604 my $forcecpu;
5605
5606 if ($snapname) {
5607 my $snapshot = $conf->{snapshots}->{$snapname};
5608 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
5609
5610 # check for machine or CPU overrides in snapshot
5611 $forcemachine = $snapshot->{runningmachine};
5612 $forcecpu = $snapshot->{runningcpu};
5613
5614 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
5615
5616 $conf = $snapshot;
5617 }
5618
5619 my $defaults = load_defaults();
5620
5621 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults,
5622 $forcemachine, $forcecpu);
5623
5624 return PVE::Tools::cmd2string($cmd);
5625 }
5626
5627 sub vm_reset {
5628 my ($vmid, $skiplock) = @_;
5629
5630 PVE::QemuConfig->lock_config($vmid, sub {
5631
5632 my $conf = PVE::QemuConfig->load_config($vmid);
5633
5634 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5635
5636 mon_cmd($vmid, "system_reset");
5637 });
5638 }
5639
5640 sub get_vm_volumes {
5641 my ($conf) = @_;
5642
5643 my $vollist = [];
5644 foreach_volid($conf, sub {
5645 my ($volid, $attr) = @_;
5646
5647 return if $volid =~ m|^/|;
5648
5649 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
5650 return if !$sid;
5651
5652 push @$vollist, $volid;
5653 });
5654
5655 return $vollist;
5656 }
5657
5658 sub vm_stop_cleanup {
5659 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
5660
5661 eval {
5662
5663 if (!$keepActive) {
5664 my $vollist = get_vm_volumes($conf);
5665 PVE::Storage::deactivate_volumes($storecfg, $vollist);
5666
5667 if (my $tpmdrive = $conf->{tpmstate0}) {
5668 my $tpm = parse_drive("tpmstate0", $tpmdrive);
5669 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
5670 if ($storeid) {
5671 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
5672 }
5673 }
5674 }
5675
5676 foreach my $ext (qw(mon qmp pid vnc qga)) {
5677 unlink "/var/run/qemu-server/${vmid}.$ext";
5678 }
5679
5680 if ($conf->{ivshmem}) {
5681 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
5682 # just delete it for now, VMs which have this already open do not
5683 # are affected, but new VMs will get a separated one. If this
5684 # becomes an issue we either add some sort of ref-counting or just
5685 # add a "don't delete on stop" flag to the ivshmem format.
5686 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
5687 }
5688
5689 my $ids = [];
5690 foreach my $key (keys %$conf) {
5691 next if $key !~ m/^hostpci(\d+)$/;
5692 my $hostpciindex = $1;
5693 my $d = parse_hostpci($conf->{$key});
5694 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
5695
5696 foreach my $pci (@{$d->{pciid}}) {
5697 my $pciid = $pci->{id};
5698 push @$ids, $pci->{id};
5699 PVE::SysFSTools::pci_cleanup_mdev_device($pciid, $uuid);
5700 }
5701 }
5702 PVE::QemuServer::PCI::remove_pci_reservation($ids);
5703
5704 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
5705 };
5706 warn $@ if $@; # avoid errors - just warn
5707 }
5708
5709 # call only in locked context
5710 sub _do_vm_stop {
5711 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
5712
5713 my $pid = check_running($vmid, $nocheck);
5714 return if !$pid;
5715
5716 my $conf;
5717 if (!$nocheck) {
5718 $conf = PVE::QemuConfig->load_config($vmid);
5719 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5720 if (!defined($timeout) && $shutdown && $conf->{startup}) {
5721 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
5722 $timeout = $opts->{down} if $opts->{down};
5723 }
5724 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
5725 }
5726
5727 eval {
5728 if ($shutdown) {
5729 if (defined($conf) && get_qga_key($conf, 'enabled')) {
5730 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
5731 } else {
5732 mon_cmd($vmid, "system_powerdown");
5733 }
5734 } else {
5735 mon_cmd($vmid, "quit");
5736 }
5737 };
5738 my $err = $@;
5739
5740 if (!$err) {
5741 $timeout = 60 if !defined($timeout);
5742
5743 my $count = 0;
5744 while (($count < $timeout) && check_running($vmid, $nocheck)) {
5745 $count++;
5746 sleep 1;
5747 }
5748
5749 if ($count >= $timeout) {
5750 if ($force) {
5751 warn "VM still running - terminating now with SIGTERM\n";
5752 kill 15, $pid;
5753 } else {
5754 die "VM quit/powerdown failed - got timeout\n";
5755 }
5756 } else {
5757 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
5758 return;
5759 }
5760 } else {
5761 if (!check_running($vmid, $nocheck)) {
5762 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
5763 return;
5764 }
5765 if ($force) {
5766 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
5767 kill 15, $pid;
5768 } else {
5769 die "VM quit/powerdown failed\n";
5770 }
5771 }
5772
5773 # wait again
5774 $timeout = 10;
5775
5776 my $count = 0;
5777 while (($count < $timeout) && check_running($vmid, $nocheck)) {
5778 $count++;
5779 sleep 1;
5780 }
5781
5782 if ($count >= $timeout) {
5783 warn "VM still running - terminating now with SIGKILL\n";
5784 kill 9, $pid;
5785 sleep 1;
5786 }
5787
5788 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
5789 }
5790
5791 # Note: use $nocheck to skip tests if VM configuration file exists.
5792 # We need that when migration VMs to other nodes (files already moved)
5793 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
5794 sub vm_stop {
5795 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
5796
5797 $force = 1 if !defined($force) && !$shutdown;
5798
5799 if ($migratedfrom){
5800 my $pid = check_running($vmid, $nocheck, $migratedfrom);
5801 kill 15, $pid if $pid;
5802 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
5803 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
5804 return;
5805 }
5806
5807 PVE::QemuConfig->lock_config($vmid, sub {
5808 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
5809 });
5810 }
5811
5812 sub vm_reboot {
5813 my ($vmid, $timeout) = @_;
5814
5815 PVE::QemuConfig->lock_config($vmid, sub {
5816 eval {
5817
5818 # only reboot if running, as qmeventd starts it again on a stop event
5819 return if !check_running($vmid);
5820
5821 create_reboot_request($vmid);
5822
5823 my $storecfg = PVE::Storage::config();
5824 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
5825
5826 };
5827 if (my $err = $@) {
5828 # avoid that the next normal shutdown will be confused for a reboot
5829 clear_reboot_request($vmid);
5830 die $err;
5831 }
5832 });
5833 }
5834
5835 # note: if using the statestorage parameter, the caller has to check privileges
5836 sub vm_suspend {
5837 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
5838
5839 my $conf;
5840 my $path;
5841 my $storecfg;
5842 my $vmstate;
5843
5844 PVE::QemuConfig->lock_config($vmid, sub {
5845
5846 $conf = PVE::QemuConfig->load_config($vmid);
5847
5848 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
5849 PVE::QemuConfig->check_lock($conf)
5850 if !($skiplock || $is_backing_up);
5851
5852 die "cannot suspend to disk during backup\n"
5853 if $is_backing_up && $includestate;
5854
5855 if ($includestate) {
5856 $conf->{lock} = 'suspending';
5857 my $date = strftime("%Y-%m-%d", localtime(time()));
5858 $storecfg = PVE::Storage::config();
5859 if (!$statestorage) {
5860 $statestorage = find_vmstate_storage($conf, $storecfg);
5861 # check permissions for the storage
5862 my $rpcenv = PVE::RPCEnvironment::get();
5863 if ($rpcenv->{type} ne 'cli') {
5864 my $authuser = $rpcenv->get_user();
5865 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
5866 }
5867 }
5868
5869
5870 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
5871 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
5872 $path = PVE::Storage::path($storecfg, $vmstate);
5873 PVE::QemuConfig->write_config($vmid, $conf);
5874 } else {
5875 mon_cmd($vmid, "stop");
5876 }
5877 });
5878
5879 if ($includestate) {
5880 # save vm state
5881 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
5882
5883 eval {
5884 set_migration_caps($vmid, 1);
5885 mon_cmd($vmid, "savevm-start", statefile => $path);
5886 for(;;) {
5887 my $state = mon_cmd($vmid, "query-savevm");
5888 if (!$state->{status}) {
5889 die "savevm not active\n";
5890 } elsif ($state->{status} eq 'active') {
5891 sleep(1);
5892 next;
5893 } elsif ($state->{status} eq 'completed') {
5894 print "State saved, quitting\n";
5895 last;
5896 } elsif ($state->{status} eq 'failed' && $state->{error}) {
5897 die "query-savevm failed with error '$state->{error}'\n"
5898 } else {
5899 die "query-savevm returned status '$state->{status}'\n";
5900 }
5901 }
5902 };
5903 my $err = $@;
5904
5905 PVE::QemuConfig->lock_config($vmid, sub {
5906 $conf = PVE::QemuConfig->load_config($vmid);
5907 if ($err) {
5908 # cleanup, but leave suspending lock, to indicate something went wrong
5909 eval {
5910 mon_cmd($vmid, "savevm-end");
5911 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
5912 PVE::Storage::vdisk_free($storecfg, $vmstate);
5913 delete $conf->@{qw(vmstate runningmachine runningcpu)};
5914 PVE::QemuConfig->write_config($vmid, $conf);
5915 };
5916 warn $@ if $@;
5917 die $err;
5918 }
5919
5920 die "lock changed unexpectedly\n"
5921 if !PVE::QemuConfig->has_lock($conf, 'suspending');
5922
5923 mon_cmd($vmid, "quit");
5924 $conf->{lock} = 'suspended';
5925 PVE::QemuConfig->write_config($vmid, $conf);
5926 });
5927 }
5928 }
5929
5930 sub vm_resume {
5931 my ($vmid, $skiplock, $nocheck) = @_;
5932
5933 PVE::QemuConfig->lock_config($vmid, sub {
5934 my $res = mon_cmd($vmid, 'query-status');
5935 my $resume_cmd = 'cont';
5936 my $reset = 0;
5937
5938 if ($res->{status}) {
5939 return if $res->{status} eq 'running'; # job done, go home
5940 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
5941 $reset = 1 if $res->{status} eq 'shutdown';
5942 }
5943
5944 if (!$nocheck) {
5945
5946 my $conf = PVE::QemuConfig->load_config($vmid);
5947
5948 PVE::QemuConfig->check_lock($conf)
5949 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
5950 }
5951
5952 if ($reset) {
5953 # required if a VM shuts down during a backup and we get a resume
5954 # request before the backup finishes for example
5955 mon_cmd($vmid, "system_reset");
5956 }
5957 mon_cmd($vmid, $resume_cmd);
5958 });
5959 }
5960
5961 sub vm_sendkey {
5962 my ($vmid, $skiplock, $key) = @_;
5963
5964 PVE::QemuConfig->lock_config($vmid, sub {
5965
5966 my $conf = PVE::QemuConfig->load_config($vmid);
5967
5968 # there is no qmp command, so we use the human monitor command
5969 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
5970 die $res if $res ne '';
5971 });
5972 }
5973
5974 # vzdump restore implementaion
5975
5976 sub tar_archive_read_firstfile {
5977 my $archive = shift;
5978
5979 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
5980
5981 # try to detect archive type first
5982 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
5983 die "unable to open file '$archive'\n";
5984 my $firstfile = <$fh>;
5985 kill 15, $pid;
5986 close $fh;
5987
5988 die "ERROR: archive contaions no data\n" if !$firstfile;
5989 chomp $firstfile;
5990
5991 return $firstfile;
5992 }
5993
5994 sub tar_restore_cleanup {
5995 my ($storecfg, $statfile) = @_;
5996
5997 print STDERR "starting cleanup\n";
5998
5999 if (my $fd = IO::File->new($statfile, "r")) {
6000 while (defined(my $line = <$fd>)) {
6001 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6002 my $volid = $2;
6003 eval {
6004 if ($volid =~ m|^/|) {
6005 unlink $volid || die 'unlink failed\n';
6006 } else {
6007 PVE::Storage::vdisk_free($storecfg, $volid);
6008 }
6009 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6010 };
6011 print STDERR "unable to cleanup '$volid' - $@" if $@;
6012 } else {
6013 print STDERR "unable to parse line in statfile - $line";
6014 }
6015 }
6016 $fd->close();
6017 }
6018 }
6019
6020 sub restore_file_archive {
6021 my ($archive, $vmid, $user, $opts) = @_;
6022
6023 return restore_vma_archive($archive, $vmid, $user, $opts)
6024 if $archive eq '-';
6025
6026 my $info = PVE::Storage::archive_info($archive);
6027 my $format = $opts->{format} // $info->{format};
6028 my $comp = $info->{compression};
6029
6030 # try to detect archive format
6031 if ($format eq 'tar') {
6032 return restore_tar_archive($archive, $vmid, $user, $opts);
6033 } else {
6034 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6035 }
6036 }
6037
6038 # hepler to remove disks that will not be used after restore
6039 my $restore_cleanup_oldconf = sub {
6040 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6041
6042 PVE::QemuConfig->foreach_volume($oldconf, sub {
6043 my ($ds, $drive) = @_;
6044
6045 return if drive_is_cdrom($drive, 1);
6046
6047 my $volid = $drive->{file};
6048 return if !$volid || $volid =~ m|^/|;
6049
6050 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6051 return if !$path || !$owner || ($owner != $vmid);
6052
6053 # Note: only delete disk we want to restore
6054 # other volumes will become unused
6055 if ($virtdev_hash->{$ds}) {
6056 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6057 if (my $err = $@) {
6058 warn $err;
6059 }
6060 }
6061 });
6062
6063 # delete vmstate files, after the restore we have no snapshots anymore
6064 foreach my $snapname (keys %{$oldconf->{snapshots}}) {
6065 my $snap = $oldconf->{snapshots}->{$snapname};
6066 if ($snap->{vmstate}) {
6067 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6068 if (my $err = $@) {
6069 warn $err;
6070 }
6071 }
6072 }
6073 };
6074
6075 # Helper to parse vzdump backup device hints
6076 #
6077 # $rpcenv: Environment, used to ckeck storage permissions
6078 # $user: User ID, to check storage permissions
6079 # $storecfg: Storage configuration
6080 # $fh: the file handle for reading the configuration
6081 # $devinfo: should contain device sizes for all backu-up'ed devices
6082 # $options: backup options (pool, default storage)
6083 #
6084 # Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6085 my $parse_backup_hints = sub {
6086 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6087
6088 my $virtdev_hash = {};
6089
6090 while (defined(my $line = <$fh>)) {
6091 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6092 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6093 die "archive does not contain data for drive '$virtdev'\n"
6094 if !$devinfo->{$devname};
6095
6096 if (defined($options->{storage})) {
6097 $storeid = $options->{storage} || 'local';
6098 } elsif (!$storeid) {
6099 $storeid = 'local';
6100 }
6101 $format = 'raw' if !$format;
6102 $devinfo->{$devname}->{devname} = $devname;
6103 $devinfo->{$devname}->{virtdev} = $virtdev;
6104 $devinfo->{$devname}->{format} = $format;
6105 $devinfo->{$devname}->{storeid} = $storeid;
6106
6107 # check permission on storage
6108 my $pool = $options->{pool}; # todo: do we need that?
6109 if ($user ne 'root@pam') {
6110 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace']);
6111 }
6112
6113 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6114 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6115 my $virtdev = $1;
6116 my $drive = parse_drive($virtdev, $2);
6117 if (drive_is_cloudinit($drive)) {
6118 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6119 $storeid = $options->{storage} if defined ($options->{storage});
6120 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6121 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6122
6123 $virtdev_hash->{$virtdev} = {
6124 format => $format,
6125 storeid => $storeid,
6126 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6127 is_cloudinit => 1,
6128 };
6129 }
6130 }
6131 }
6132
6133 return $virtdev_hash;
6134 };
6135
6136 # Helper to allocate and activate all volumes required for a restore
6137 #
6138 # $storecfg: Storage configuration
6139 # $virtdev_hash: as returned by parse_backup_hints()
6140 #
6141 # Returns: { $virtdev => $volid }
6142 my $restore_allocate_devices = sub {
6143 my ($storecfg, $virtdev_hash, $vmid) = @_;
6144
6145 my $map = {};
6146 foreach my $virtdev (sort keys %$virtdev_hash) {
6147 my $d = $virtdev_hash->{$virtdev};
6148 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6149 my $storeid = $d->{storeid};
6150 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6151
6152 # test if requested format is supported
6153 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6154 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6155 $d->{format} = $defFormat if !$supported;
6156
6157 my $name;
6158 if ($d->{is_cloudinit}) {
6159 $name = "vm-$vmid-cloudinit";
6160 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6161 if ($scfg->{path}) {
6162 $name .= ".$d->{format}";
6163 }
6164 }
6165
6166 my $volid = PVE::Storage::vdisk_alloc(
6167 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6168
6169 print STDERR "new volume ID is '$volid'\n";
6170 $d->{volid} = $volid;
6171
6172 PVE::Storage::activate_volumes($storecfg, [$volid]);
6173
6174 $map->{$virtdev} = $volid;
6175 }
6176
6177 return $map;
6178 };
6179
6180 sub restore_update_config_line {
6181 my ($cookie, $map, $line, $unique) = @_;
6182
6183 return '' if $line =~ m/^\#qmdump\#/;
6184 return '' if $line =~ m/^\#vzdump\#/;
6185 return '' if $line =~ m/^lock:/;
6186 return '' if $line =~ m/^unused\d+:/;
6187 return '' if $line =~ m/^parent:/;
6188
6189 my $res = '';
6190
6191 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6192 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6193 # try to convert old 1.X settings
6194 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6195 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6196 my ($model, $macaddr) = split(/\=/, $devconfig);
6197 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6198 my $net = {
6199 model => $model,
6200 bridge => "vmbr$ind",
6201 macaddr => $macaddr,
6202 };
6203 my $netstr = print_net($net);
6204
6205 $res .= "net$cookie->{netcount}: $netstr\n";
6206 $cookie->{netcount}++;
6207 }
6208 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6209 my ($id, $netstr) = ($1, $2);
6210 my $net = parse_net($netstr);
6211 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6212 $netstr = print_net($net);
6213 $res .= "$id: $netstr\n";
6214 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6215 my $virtdev = $1;
6216 my $value = $3;
6217 my $di = parse_drive($virtdev, $value);
6218 if (defined($di->{backup}) && !$di->{backup}) {
6219 $res .= "#$line";
6220 } elsif ($map->{$virtdev}) {
6221 delete $di->{format}; # format can change on restore
6222 $di->{file} = $map->{$virtdev};
6223 $value = print_drive($di);
6224 $res .= "$virtdev: $value\n";
6225 } else {
6226 $res .= $line;
6227 }
6228 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6229 my $vmgenid = $1;
6230 if ($vmgenid ne '0') {
6231 # always generate a new vmgenid if there was a valid one setup
6232 $vmgenid = generate_uuid();
6233 }
6234 $res .= "vmgenid: $vmgenid\n";
6235 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6236 my ($uuid, $uuid_str);
6237 UUID::generate($uuid);
6238 UUID::unparse($uuid, $uuid_str);
6239 my $smbios1 = parse_smbios1($2);
6240 $smbios1->{uuid} = $uuid_str;
6241 $res .= $1.print_smbios1($smbios1)."\n";
6242 } else {
6243 $res .= $line;
6244 }
6245
6246 return $res;
6247 }
6248
6249 my $restore_deactivate_volumes = sub {
6250 my ($storecfg, $devinfo) = @_;
6251
6252 my $vollist = [];
6253 foreach my $devname (keys %$devinfo) {
6254 my $volid = $devinfo->{$devname}->{volid};
6255 push @$vollist, $volid if $volid;
6256 }
6257
6258 PVE::Storage::deactivate_volumes($storecfg, $vollist);
6259 };
6260
6261 my $restore_destroy_volumes = sub {
6262 my ($storecfg, $devinfo) = @_;
6263
6264 foreach my $devname (keys %$devinfo) {
6265 my $volid = $devinfo->{$devname}->{volid};
6266 next if !$volid;
6267 eval {
6268 if ($volid =~ m|^/|) {
6269 unlink $volid || die 'unlink failed\n';
6270 } else {
6271 PVE::Storage::vdisk_free($storecfg, $volid);
6272 }
6273 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6274 };
6275 print STDERR "unable to cleanup '$volid' - $@" if $@;
6276 }
6277 };
6278
6279 sub scan_volids {
6280 my ($cfg, $vmid) = @_;
6281
6282 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6283
6284 my $volid_hash = {};
6285 foreach my $storeid (keys %$info) {
6286 foreach my $item (@{$info->{$storeid}}) {
6287 next if !($item->{volid} && $item->{size});
6288 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6289 $volid_hash->{$item->{volid}} = $item;
6290 }
6291 }
6292
6293 return $volid_hash;
6294 }
6295
6296 sub update_disk_config {
6297 my ($vmid, $conf, $volid_hash) = @_;
6298
6299 my $changes;
6300 my $prefix = "VM $vmid";
6301
6302 # used and unused disks
6303 my $referenced = {};
6304
6305 # Note: it is allowed to define multiple storages with same path (alias), so
6306 # we need to check both 'volid' and real 'path' (two different volid can point
6307 # to the same path).
6308
6309 my $referencedpath = {};
6310
6311 # update size info
6312 PVE::QemuConfig->foreach_volume($conf, sub {
6313 my ($opt, $drive) = @_;
6314
6315 my $volid = $drive->{file};
6316 return if !$volid;
6317 my $volume = $volid_hash->{$volid};
6318
6319 # mark volid as "in-use" for next step
6320 $referenced->{$volid} = 1;
6321 if ($volume && (my $path = $volume->{path})) {
6322 $referencedpath->{$path} = 1;
6323 }
6324
6325 return if drive_is_cdrom($drive);
6326 return if !$volume;
6327
6328 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6329 if (defined($updated)) {
6330 $changes = 1;
6331 $conf->{$opt} = print_drive($updated);
6332 print "$prefix ($opt): $msg\n";
6333 }
6334 });
6335
6336 # remove 'unusedX' entry if volume is used
6337 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6338 my ($opt, $drive) = @_;
6339
6340 my $volid = $drive->{file};
6341 return if !$volid;
6342
6343 my $path;
6344 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6345 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6346 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6347 $changes = 1;
6348 delete $conf->{$opt};
6349 }
6350
6351 $referenced->{$volid} = 1;
6352 $referencedpath->{$path} = 1 if $path;
6353 });
6354
6355 foreach my $volid (sort keys %$volid_hash) {
6356 next if $volid =~ m/vm-$vmid-state-/;
6357 next if $referenced->{$volid};
6358 my $path = $volid_hash->{$volid}->{path};
6359 next if !$path; # just to be sure
6360 next if $referencedpath->{$path};
6361 $changes = 1;
6362 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6363 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6364 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6365 }
6366
6367 return $changes;
6368 }
6369
6370 sub rescan {
6371 my ($vmid, $nolock, $dryrun) = @_;
6372
6373 my $cfg = PVE::Storage::config();
6374
6375 print "rescan volumes...\n";
6376 my $volid_hash = scan_volids($cfg, $vmid);
6377
6378 my $updatefn = sub {
6379 my ($vmid) = @_;
6380
6381 my $conf = PVE::QemuConfig->load_config($vmid);
6382
6383 PVE::QemuConfig->check_lock($conf);
6384
6385 my $vm_volids = {};
6386 foreach my $volid (keys %$volid_hash) {
6387 my $info = $volid_hash->{$volid};
6388 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6389 }
6390
6391 my $changes = update_disk_config($vmid, $conf, $vm_volids);
6392
6393 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
6394 };
6395
6396 if (defined($vmid)) {
6397 if ($nolock) {
6398 &$updatefn($vmid);
6399 } else {
6400 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6401 }
6402 } else {
6403 my $vmlist = config_list();
6404 foreach my $vmid (keys %$vmlist) {
6405 if ($nolock) {
6406 &$updatefn($vmid);
6407 } else {
6408 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6409 }
6410 }
6411 }
6412 }
6413
6414 sub restore_proxmox_backup_archive {
6415 my ($archive, $vmid, $user, $options) = @_;
6416
6417 my $storecfg = PVE::Storage::config();
6418
6419 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
6420 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6421
6422 my $fingerprint = $scfg->{fingerprint};
6423 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
6424
6425 my $repo = PVE::PBSClient::get_repository($scfg);
6426
6427 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
6428 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
6429 local $ENV{PBS_PASSWORD} = $password;
6430 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
6431
6432 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
6433 PVE::Storage::parse_volname($storecfg, $archive);
6434
6435 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
6436
6437 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
6438
6439 my $tmpdir = "/var/tmp/vzdumptmp$$";
6440 rmtree $tmpdir;
6441 mkpath $tmpdir;
6442
6443 my $conffile = PVE::QemuConfig->config_file($vmid);
6444 # disable interrupts (always do cleanups)
6445 local $SIG{INT} =
6446 local $SIG{TERM} =
6447 local $SIG{QUIT} =
6448 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
6449
6450 # Note: $oldconf is undef if VM does not exists
6451 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6452 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6453 my $new_conf_raw = '';
6454
6455 my $rpcenv = PVE::RPCEnvironment::get();
6456 my $devinfo = {};
6457
6458 eval {
6459 # enable interrupts
6460 local $SIG{INT} =
6461 local $SIG{TERM} =
6462 local $SIG{QUIT} =
6463 local $SIG{HUP} =
6464 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6465
6466 my $cfgfn = "$tmpdir/qemu-server.conf";
6467 my $firewall_config_fn = "$tmpdir/fw.conf";
6468 my $index_fn = "$tmpdir/index.json";
6469
6470 my $cmd = "restore";
6471
6472 my $param = [$pbs_backup_name, "index.json", $index_fn];
6473 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6474 my $index = PVE::Tools::file_get_contents($index_fn);
6475 $index = decode_json($index);
6476
6477 # print Dumper($index);
6478 foreach my $info (@{$index->{files}}) {
6479 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
6480 my $devname = $1;
6481 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
6482 $devinfo->{$devname}->{size} = $1;
6483 } else {
6484 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
6485 }
6486 }
6487 }
6488
6489 my $is_qemu_server_backup = scalar(
6490 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
6491 );
6492 if (!$is_qemu_server_backup) {
6493 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
6494 }
6495 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
6496
6497 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
6498 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6499
6500 if ($has_firewall_config) {
6501 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
6502 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6503
6504 my $pve_firewall_dir = '/etc/pve/firewall';
6505 mkdir $pve_firewall_dir; # make sure the dir exists
6506 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
6507 }
6508
6509 my $fh = IO::File->new($cfgfn, "r") ||
6510 die "unable to read qemu-server.conf - $!\n";
6511
6512 my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
6513
6514 # fixme: rate limit?
6515
6516 # create empty/temp config
6517 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
6518
6519 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
6520
6521 # allocate volumes
6522 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
6523
6524 foreach my $virtdev (sort keys %$virtdev_hash) {
6525 my $d = $virtdev_hash->{$virtdev};
6526 next if $d->{is_cloudinit}; # no need to restore cloudinit
6527
6528 # this fails if storage is unavailable
6529 my $volid = $d->{volid};
6530 my $path = PVE::Storage::path($storecfg, $volid);
6531
6532 # for live-restore we only want to preload the efidisk and TPM state
6533 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
6534
6535 my $pbs_restore_cmd = [
6536 '/usr/bin/pbs-restore',
6537 '--repository', $repo,
6538 $pbs_backup_name,
6539 "$d->{devname}.img.fidx",
6540 $path,
6541 '--verbose',
6542 ];
6543
6544 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
6545 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
6546
6547 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
6548 push @$pbs_restore_cmd, '--skip-zero';
6549 }
6550
6551 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
6552 print "restore proxmox backup image: $dbg_cmdstring\n";
6553 run_command($pbs_restore_cmd);
6554 }
6555
6556 $fh->seek(0, 0) || die "seek failed - $!\n";
6557
6558 my $cookie = { netcount => 0 };
6559 while (defined(my $line = <$fh>)) {
6560 $new_conf_raw .= restore_update_config_line(
6561 $cookie,
6562 $map,
6563 $line,
6564 $options->{unique},
6565 );
6566 }
6567
6568 $fh->close();
6569 };
6570 my $err = $@;
6571
6572 if ($err || !$options->{live}) {
6573 $restore_deactivate_volumes->($storecfg, $devinfo);
6574 }
6575
6576 rmtree $tmpdir;
6577
6578 if ($err) {
6579 $restore_destroy_volumes->($storecfg, $devinfo);
6580 die $err;
6581 }
6582
6583 if ($options->{live}) {
6584 # keep lock during live-restore
6585 $new_conf_raw .= "\nlock: create";
6586 }
6587
6588 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
6589
6590 PVE::Cluster::cfs_update(); # make sure we read new file
6591
6592 eval { rescan($vmid, 1); };
6593 warn $@ if $@;
6594
6595 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
6596
6597 if ($options->{live}) {
6598 # enable interrupts
6599 local $SIG{INT} =
6600 local $SIG{TERM} =
6601 local $SIG{QUIT} =
6602 local $SIG{HUP} =
6603 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
6604
6605 my $conf = PVE::QemuConfig->load_config($vmid);
6606 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
6607
6608 # these special drives are already restored before start
6609 delete $devinfo->{'drive-efidisk0'};
6610 delete $devinfo->{'drive-tpmstate0-backup'};
6611 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $repo, $keyfile, $pbs_backup_name);
6612
6613 PVE::QemuConfig->remove_lock($vmid, "create");
6614 }
6615 }
6616
6617 sub pbs_live_restore {
6618 my ($vmid, $conf, $storecfg, $restored_disks, $repo, $keyfile, $snap) = @_;
6619
6620 print "starting VM for live-restore\n";
6621 print "repository: '$repo', snapshot: '$snap'\n";
6622
6623 my $pbs_backing = {};
6624 for my $ds (keys %$restored_disks) {
6625 $ds =~ m/^drive-(.*)$/;
6626 my $confname = $1;
6627 $pbs_backing->{$confname} = {
6628 repository => $repo,
6629 snapshot => $snap,
6630 archive => "$ds.img.fidx",
6631 };
6632 $pbs_backing->{$confname}->{keyfile} = $keyfile if -e $keyfile;
6633
6634 my $drive = parse_drive($confname, $conf->{$confname});
6635 print "restoring '$ds' to '$drive->{file}'\n";
6636 }
6637
6638 my $drives_streamed = 0;
6639 eval {
6640 # make sure HA doesn't interrupt our restore by stopping the VM
6641 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
6642 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
6643 }
6644
6645 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
6646 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
6647 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
6648
6649 my $qmeventd_fd = register_qmeventd_handle($vmid);
6650
6651 # begin streaming, i.e. data copy from PBS to target disk for every vol,
6652 # this will effectively collapse the backing image chain consisting of
6653 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
6654 # removes itself once all backing images vanish with 'auto-remove=on')
6655 my $jobs = {};
6656 for my $ds (sort keys %$restored_disks) {
6657 my $job_id = "restore-$ds";
6658 mon_cmd($vmid, 'block-stream',
6659 'job-id' => $job_id,
6660 device => "$ds",
6661 );
6662 $jobs->{$job_id} = {};
6663 }
6664
6665 mon_cmd($vmid, 'cont');
6666 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
6667
6668 print "restore-drive jobs finished successfully, removing all tracking block devices"
6669 ." to disconnect from Proxmox Backup Server\n";
6670
6671 for my $ds (sort keys %$restored_disks) {
6672 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
6673 }
6674
6675 close($qmeventd_fd);
6676 };
6677
6678 my $err = $@;
6679
6680 if ($err) {
6681 warn "An error occured during live-restore: $err\n";
6682 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
6683 die "live-restore failed\n";
6684 }
6685 }
6686
6687 sub restore_vma_archive {
6688 my ($archive, $vmid, $user, $opts, $comp) = @_;
6689
6690 my $readfrom = $archive;
6691
6692 my $cfg = PVE::Storage::config();
6693 my $commands = [];
6694 my $bwlimit = $opts->{bwlimit};
6695
6696 my $dbg_cmdstring = '';
6697 my $add_pipe = sub {
6698 my ($cmd) = @_;
6699 push @$commands, $cmd;
6700 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
6701 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
6702 $readfrom = '-';
6703 };
6704
6705 my $input = undef;
6706 if ($archive eq '-') {
6707 $input = '<&STDIN';
6708 } else {
6709 # If we use a backup from a PVE defined storage we also consider that
6710 # storage's rate limit:
6711 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
6712 if (defined($volid)) {
6713 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
6714 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
6715 if ($readlimit) {
6716 print STDERR "applying read rate limit: $readlimit\n";
6717 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
6718 $add_pipe->($cstream);
6719 }
6720 }
6721 }
6722
6723 if ($comp) {
6724 my $info = PVE::Storage::decompressor_info('vma', $comp);
6725 my $cmd = $info->{decompressor};
6726 push @$cmd, $readfrom;
6727 $add_pipe->($cmd);
6728 }
6729
6730 my $tmpdir = "/var/tmp/vzdumptmp$$";
6731 rmtree $tmpdir;
6732
6733 # disable interrupts (always do cleanups)
6734 local $SIG{INT} =
6735 local $SIG{TERM} =
6736 local $SIG{QUIT} =
6737 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
6738
6739 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
6740 POSIX::mkfifo($mapfifo, 0600);
6741 my $fifofh;
6742 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
6743
6744 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
6745
6746 my $oldtimeout;
6747 my $timeout = 5;
6748
6749 my $devinfo = {};
6750
6751 my $rpcenv = PVE::RPCEnvironment::get();
6752
6753 my $conffile = PVE::QemuConfig->config_file($vmid);
6754
6755 # Note: $oldconf is undef if VM does not exist
6756 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6757 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6758 my $new_conf_raw = '';
6759
6760 my %storage_limits;
6761
6762 my $print_devmap = sub {
6763 my $cfgfn = "$tmpdir/qemu-server.conf";
6764
6765 # we can read the config - that is already extracted
6766 my $fh = IO::File->new($cfgfn, "r") ||
6767 die "unable to read qemu-server.conf - $!\n";
6768
6769 my $fwcfgfn = "$tmpdir/qemu-server.fw";
6770 if (-f $fwcfgfn) {
6771 my $pve_firewall_dir = '/etc/pve/firewall';
6772 mkdir $pve_firewall_dir; # make sure the dir exists
6773 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
6774 }
6775
6776 my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
6777
6778 foreach my $info (values %{$virtdev_hash}) {
6779 my $storeid = $info->{storeid};
6780 next if defined($storage_limits{$storeid});
6781
6782 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
6783 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
6784 $storage_limits{$storeid} = $limit * 1024;
6785 }
6786
6787 foreach my $devname (keys %$devinfo) {
6788 die "found no device mapping information for device '$devname'\n"
6789 if !$devinfo->{$devname}->{virtdev};
6790 }
6791
6792 # create empty/temp config
6793 if ($oldconf) {
6794 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
6795 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
6796 }
6797
6798 # allocate volumes
6799 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
6800
6801 # print restore information to $fifofh
6802 foreach my $virtdev (sort keys %$virtdev_hash) {
6803 my $d = $virtdev_hash->{$virtdev};
6804 next if $d->{is_cloudinit}; # no need to restore cloudinit
6805
6806 my $storeid = $d->{storeid};
6807 my $volid = $d->{volid};
6808
6809 my $map_opts = '';
6810 if (my $limit = $storage_limits{$storeid}) {
6811 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
6812 }
6813
6814 my $write_zeros = 1;
6815 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
6816 $write_zeros = 0;
6817 }
6818
6819 my $path = PVE::Storage::path($cfg, $volid);
6820
6821 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
6822
6823 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
6824 }
6825
6826 $fh->seek(0, 0) || die "seek failed - $!\n";
6827
6828 my $cookie = { netcount => 0 };
6829 while (defined(my $line = <$fh>)) {
6830 $new_conf_raw .= restore_update_config_line(
6831 $cookie,
6832 $map,
6833 $line,
6834 $opts->{unique},
6835 );
6836 }
6837
6838 $fh->close();
6839 };
6840
6841 eval {
6842 # enable interrupts
6843 local $SIG{INT} =
6844 local $SIG{TERM} =
6845 local $SIG{QUIT} =
6846 local $SIG{HUP} =
6847 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6848 local $SIG{ALRM} = sub { die "got timeout\n"; };
6849
6850 $oldtimeout = alarm($timeout);
6851
6852 my $parser = sub {
6853 my $line = shift;
6854
6855 print "$line\n";
6856
6857 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
6858 my ($dev_id, $size, $devname) = ($1, $2, $3);
6859 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
6860 } elsif ($line =~ m/^CTIME: /) {
6861 # we correctly received the vma config, so we can disable
6862 # the timeout now for disk allocation (set to 10 minutes, so
6863 # that we always timeout if something goes wrong)
6864 alarm(600);
6865 &$print_devmap();
6866 print $fifofh "done\n";
6867 my $tmp = $oldtimeout || 0;
6868 $oldtimeout = undef;
6869 alarm($tmp);
6870 close($fifofh);
6871 $fifofh = undef;
6872 }
6873 };
6874
6875 print "restore vma archive: $dbg_cmdstring\n";
6876 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
6877 };
6878 my $err = $@;
6879
6880 alarm($oldtimeout) if $oldtimeout;
6881
6882 $restore_deactivate_volumes->($cfg, $devinfo);
6883
6884 close($fifofh) if $fifofh;
6885 unlink $mapfifo;
6886 rmtree $tmpdir;
6887
6888 if ($err) {
6889 $restore_destroy_volumes->($cfg, $devinfo);
6890 die $err;
6891 }
6892
6893 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
6894
6895 PVE::Cluster::cfs_update(); # make sure we read new file
6896
6897 eval { rescan($vmid, 1); };
6898 warn $@ if $@;
6899
6900 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
6901 }
6902
6903 sub restore_tar_archive {
6904 my ($archive, $vmid, $user, $opts) = @_;
6905
6906 if ($archive ne '-') {
6907 my $firstfile = tar_archive_read_firstfile($archive);
6908 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
6909 if $firstfile ne 'qemu-server.conf';
6910 }
6911
6912 my $storecfg = PVE::Storage::config();
6913
6914 # avoid zombie disks when restoring over an existing VM -> cleanup first
6915 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
6916 # skiplock=1 because qmrestore has set the 'create' lock itself already
6917 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
6918 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
6919
6920 my $tocmd = "/usr/lib/qemu-server/qmextract";
6921
6922 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
6923 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
6924 $tocmd .= ' --prealloc' if $opts->{prealloc};
6925 $tocmd .= ' --info' if $opts->{info};
6926
6927 # tar option "xf" does not autodetect compression when read from STDIN,
6928 # so we pipe to zcat
6929 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
6930 PVE::Tools::shellquote("--to-command=$tocmd");
6931
6932 my $tmpdir = "/var/tmp/vzdumptmp$$";
6933 mkpath $tmpdir;
6934
6935 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
6936 local $ENV{VZDUMP_VMID} = $vmid;
6937 local $ENV{VZDUMP_USER} = $user;
6938
6939 my $conffile = PVE::QemuConfig->config_file($vmid);
6940 my $new_conf_raw = '';
6941
6942 # disable interrupts (always do cleanups)
6943 local $SIG{INT} =
6944 local $SIG{TERM} =
6945 local $SIG{QUIT} =
6946 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
6947
6948 eval {
6949 # enable interrupts
6950 local $SIG{INT} =
6951 local $SIG{TERM} =
6952 local $SIG{QUIT} =
6953 local $SIG{HUP} =
6954 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6955
6956 if ($archive eq '-') {
6957 print "extracting archive from STDIN\n";
6958 run_command($cmd, input => "<&STDIN");
6959 } else {
6960 print "extracting archive '$archive'\n";
6961 run_command($cmd);
6962 }
6963
6964 return if $opts->{info};
6965
6966 # read new mapping
6967 my $map = {};
6968 my $statfile = "$tmpdir/qmrestore.stat";
6969 if (my $fd = IO::File->new($statfile, "r")) {
6970 while (defined (my $line = <$fd>)) {
6971 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6972 $map->{$1} = $2 if $1;
6973 } else {
6974 print STDERR "unable to parse line in statfile - $line\n";
6975 }
6976 }
6977 $fd->close();
6978 }
6979
6980 my $confsrc = "$tmpdir/qemu-server.conf";
6981
6982 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
6983
6984 my $cookie = { netcount => 0 };
6985 while (defined (my $line = <$srcfd>)) {
6986 $new_conf_raw .= restore_update_config_line(
6987 $cookie,
6988 $map,
6989 $line,
6990 $opts->{unique},
6991 );
6992 }
6993
6994 $srcfd->close();
6995 };
6996 if (my $err = $@) {
6997 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
6998 die $err;
6999 }
7000
7001 rmtree $tmpdir;
7002
7003 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7004
7005 PVE::Cluster::cfs_update(); # make sure we read new file
7006
7007 eval { rescan($vmid, 1); };
7008 warn $@ if $@;
7009 };
7010
7011 sub foreach_storage_used_by_vm {
7012 my ($conf, $func) = @_;
7013
7014 my $sidhash = {};
7015
7016 PVE::QemuConfig->foreach_volume($conf, sub {
7017 my ($ds, $drive) = @_;
7018 return if drive_is_cdrom($drive);
7019
7020 my $volid = $drive->{file};
7021
7022 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7023 $sidhash->{$sid} = $sid if $sid;
7024 });
7025
7026 foreach my $sid (sort keys %$sidhash) {
7027 &$func($sid);
7028 }
7029 }
7030
7031 my $qemu_snap_storage = {
7032 rbd => 1,
7033 };
7034 sub do_snapshots_with_qemu {
7035 my ($storecfg, $volid, $deviceid) = @_;
7036
7037 return if $deviceid =~ m/tpmstate0/;
7038
7039 my $storage_name = PVE::Storage::parse_volume_id($volid);
7040 my $scfg = $storecfg->{ids}->{$storage_name};
7041 die "could not find storage '$storage_name'\n" if !defined($scfg);
7042
7043 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7044 return 1;
7045 }
7046
7047 if ($volid =~ m/\.(qcow2|qed)$/){
7048 return 1;
7049 }
7050
7051 return;
7052 }
7053
7054 sub qga_check_running {
7055 my ($vmid, $nowarn) = @_;
7056
7057 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7058 if ($@) {
7059 warn "Qemu Guest Agent is not running - $@" if !$nowarn;
7060 return 0;
7061 }
7062 return 1;
7063 }
7064
7065 sub template_create {
7066 my ($vmid, $conf, $disk) = @_;
7067
7068 my $storecfg = PVE::Storage::config();
7069
7070 PVE::QemuConfig->foreach_volume($conf, sub {
7071 my ($ds, $drive) = @_;
7072
7073 return if drive_is_cdrom($drive);
7074 return if $disk && $ds ne $disk;
7075
7076 my $volid = $drive->{file};
7077 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7078
7079 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7080 $drive->{file} = $voliddst;
7081 $conf->{$ds} = print_drive($drive);
7082 PVE::QemuConfig->write_config($vmid, $conf);
7083 });
7084 }
7085
7086 sub convert_iscsi_path {
7087 my ($path) = @_;
7088
7089 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7090 my $portal = $1;
7091 my $target = $2;
7092 my $lun = $3;
7093
7094 my $initiator_name = get_initiator_name();
7095
7096 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7097 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7098 }
7099
7100 die "cannot convert iscsi path '$path', unkown format\n";
7101 }
7102
7103 sub qemu_img_convert {
7104 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized) = @_;
7105
7106 my $storecfg = PVE::Storage::config();
7107 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7108 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7109
7110 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7111
7112 my $cachemode;
7113 my $src_path;
7114 my $src_is_iscsi = 0;
7115 my $src_format;
7116
7117 if ($src_storeid) {
7118 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7119 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7120 $src_format = qemu_img_format($src_scfg, $src_volname);
7121 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7122 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7123 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7124 } elsif (-f $src_volid) {
7125 $src_path = $src_volid;
7126 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7127 $src_format = $1;
7128 }
7129 }
7130
7131 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7132
7133 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7134 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7135 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7136 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7137
7138 my $cmd = [];
7139 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7140 push @$cmd, '-l', "snapshot.name=$snapname"
7141 if $snapname && $src_format && $src_format eq "qcow2";
7142 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7143 push @$cmd, '-T', $cachemode if defined($cachemode);
7144
7145 if ($src_is_iscsi) {
7146 push @$cmd, '--image-opts';
7147 $src_path = convert_iscsi_path($src_path);
7148 } elsif ($src_format) {
7149 push @$cmd, '-f', $src_format;
7150 }
7151
7152 if ($dst_is_iscsi) {
7153 push @$cmd, '--target-image-opts';
7154 $dst_path = convert_iscsi_path($dst_path);
7155 } else {
7156 push @$cmd, '-O', $dst_format;
7157 }
7158
7159 push @$cmd, $src_path;
7160
7161 if (!$dst_is_iscsi && $is_zero_initialized) {
7162 push @$cmd, "zeroinit:$dst_path";
7163 } else {
7164 push @$cmd, $dst_path;
7165 }
7166
7167 my $parser = sub {
7168 my $line = shift;
7169 if($line =~ m/\((\S+)\/100\%\)/){
7170 my $percent = $1;
7171 my $transferred = int($size * $percent / 100);
7172 my $total_h = render_bytes($size, 1);
7173 my $transferred_h = render_bytes($transferred, 1);
7174
7175 print "transferred $transferred_h of $total_h ($percent%)\n";
7176 }
7177
7178 };
7179
7180 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7181 my $err = $@;
7182 die "copy failed: $err" if $err;
7183 }
7184
7185 sub qemu_img_format {
7186 my ($scfg, $volname) = @_;
7187
7188 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7189 return $1;
7190 } else {
7191 return "raw";
7192 }
7193 }
7194
7195 sub qemu_drive_mirror {
7196 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7197
7198 $jobs = {} if !$jobs;
7199
7200 my $qemu_target;
7201 my $format;
7202 $jobs->{"drive-$drive"} = {};
7203
7204 if ($dst_volid =~ /^nbd:/) {
7205 $qemu_target = $dst_volid;
7206 $format = "nbd";
7207 } else {
7208 my $storecfg = PVE::Storage::config();
7209 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7210
7211 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7212
7213 $format = qemu_img_format($dst_scfg, $dst_volname);
7214
7215 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7216
7217 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7218 }
7219
7220 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7221 $opts->{format} = $format if $format;
7222
7223 if (defined($src_bitmap)) {
7224 $opts->{sync} = 'incremental';
7225 $opts->{bitmap} = $src_bitmap;
7226 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7227 }
7228
7229 if (defined($bwlimit)) {
7230 $opts->{speed} = $bwlimit * 1024;
7231 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7232 } else {
7233 print "drive mirror is starting for drive-$drive\n";
7234 }
7235
7236 # if a job already runs for this device we get an error, catch it for cleanup
7237 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7238 if (my $err = $@) {
7239 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7240 warn "$@\n" if $@;
7241 die "mirroring error: $err\n";
7242 }
7243
7244 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7245 }
7246
7247 # $completion can be either
7248 # 'complete': wait until all jobs are ready, block-job-complete them (default)
7249 # 'cancel': wait until all jobs are ready, block-job-cancel them
7250 # 'skip': wait until all jobs are ready, return with block jobs in ready state
7251 # 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7252 sub qemu_drive_mirror_monitor {
7253 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7254
7255 $completion //= 'complete';
7256 $op //= "mirror";
7257
7258 eval {
7259 my $err_complete = 0;
7260
7261 my $starttime = time ();
7262 while (1) {
7263 die "block job ('$op') timed out\n" if $err_complete > 300;
7264
7265 my $stats = mon_cmd($vmid, "query-block-jobs");
7266 my $ctime = time();
7267
7268 my $running_jobs = {};
7269 for my $stat (@$stats) {
7270 next if $stat->{type} ne $op;
7271 $running_jobs->{$stat->{device}} = $stat;
7272 }
7273
7274 my $readycounter = 0;
7275
7276 for my $job_id (sort keys %$jobs) {
7277 my $job = $running_jobs->{$job_id};
7278
7279 my $vanished = !defined($job);
7280 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7281 if($complete || ($vanished && $completion eq 'auto')) {
7282 print "$job_id: $op-job finished\n";
7283 delete $jobs->{$job_id};
7284 next;
7285 }
7286
7287 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7288
7289 my $busy = $job->{busy};
7290 my $ready = $job->{ready};
7291 if (my $total = $job->{len}) {
7292 my $transferred = $job->{offset} || 0;
7293 my $remaining = $total - $transferred;
7294 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7295
7296 my $duration = $ctime - $starttime;
7297 my $total_h = render_bytes($total, 1);
7298 my $transferred_h = render_bytes($transferred, 1);
7299
7300 my $status = sprintf(
7301 "transferred $transferred_h of $total_h ($percent%%) in %s",
7302 render_duration($duration),
7303 );
7304
7305 if ($ready) {
7306 if ($busy) {
7307 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7308 } else {
7309 $status .= ", ready";
7310 }
7311 }
7312 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7313 $jobs->{$job_id}->{ready} = $ready;
7314 }
7315
7316 $readycounter++ if $job->{ready};
7317 }
7318
7319 last if scalar(keys %$jobs) == 0;
7320
7321 if ($readycounter == scalar(keys %$jobs)) {
7322 print "all '$op' jobs are ready\n";
7323
7324 # do the complete later (or has already been done)
7325 last if $completion eq 'skip' || $completion eq 'auto';
7326
7327 if ($vmiddst && $vmiddst != $vmid) {
7328 my $agent_running = $qga && qga_check_running($vmid);
7329 if ($agent_running) {
7330 print "freeze filesystem\n";
7331 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
7332 } else {
7333 print "suspend vm\n";
7334 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
7335 }
7336
7337 # if we clone a disk for a new target vm, we don't switch the disk
7338 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
7339
7340 if ($agent_running) {
7341 print "unfreeze filesystem\n";
7342 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
7343 } else {
7344 print "resume vm\n";
7345 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7346 }
7347
7348 last;
7349 } else {
7350
7351 for my $job_id (sort keys %$jobs) {
7352 # try to switch the disk if source and destination are on the same guest
7353 print "$job_id: Completing block job_id...\n";
7354
7355 my $op;
7356 if ($completion eq 'complete') {
7357 $op = 'block-job-complete';
7358 } elsif ($completion eq 'cancel') {
7359 $op = 'block-job-cancel';
7360 } else {
7361 die "invalid completion value: $completion\n";
7362 }
7363 eval { mon_cmd($vmid, $op, device => $job_id) };
7364 if ($@ =~ m/cannot be completed/) {
7365 print "$job_id: block job cannot be completed, trying again.\n";
7366 $err_complete++;
7367 }else {
7368 print "$job_id: Completed successfully.\n";
7369 $jobs->{$job_id}->{complete} = 1;
7370 }
7371 }
7372 }
7373 }
7374 sleep 1;
7375 }
7376 };
7377 my $err = $@;
7378
7379 if ($err) {
7380 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7381 die "block job ($op) error: $err";
7382 }
7383 }
7384
7385 sub qemu_blockjobs_cancel {
7386 my ($vmid, $jobs) = @_;
7387
7388 foreach my $job (keys %$jobs) {
7389 print "$job: Cancelling block job\n";
7390 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
7391 $jobs->{$job}->{cancel} = 1;
7392 }
7393
7394 while (1) {
7395 my $stats = mon_cmd($vmid, "query-block-jobs");
7396
7397 my $running_jobs = {};
7398 foreach my $stat (@$stats) {
7399 $running_jobs->{$stat->{device}} = $stat;
7400 }
7401
7402 foreach my $job (keys %$jobs) {
7403
7404 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
7405 print "$job: Done.\n";
7406 delete $jobs->{$job};
7407 }
7408 }
7409
7410 last if scalar(keys %$jobs) == 0;
7411
7412 sleep 1;
7413 }
7414 }
7415
7416 sub clone_disk {
7417 my ($storecfg, $vmid, $running, $drivename, $drive, $snapname,
7418 $newvmid, $storage, $format, $full, $newvollist, $jobs, $completion, $qga, $bwlimit, $conf) = @_;
7419
7420 my $newvolid;
7421
7422 if (!$full) {
7423 print "create linked clone of drive $drivename ($drive->{file})\n";
7424 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
7425 push @$newvollist, $newvolid;
7426 } else {
7427
7428 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
7429 $storeid = $storage if $storage;
7430
7431 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
7432
7433 print "create full clone of drive $drivename ($drive->{file})\n";
7434 my $name = undef;
7435 my $size = undef;
7436 if (drive_is_cloudinit($drive)) {
7437 $name = "vm-$newvmid-cloudinit";
7438 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7439 if ($scfg->{path}) {
7440 $name .= ".$dst_format";
7441 }
7442 $snapname = undef;
7443 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
7444 } elsif ($drivename eq 'efidisk0') {
7445 $size = get_efivars_size($conf);
7446 } elsif ($drivename eq 'tpmstate0') {
7447 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7448 } else {
7449 ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
7450 }
7451 $newvolid = PVE::Storage::vdisk_alloc(
7452 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
7453 );
7454 push @$newvollist, $newvolid;
7455
7456 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
7457
7458 if (drive_is_cloudinit($drive)) {
7459 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
7460 # if this is the case, we have to complete any block-jobs still there from
7461 # previous drive-mirrors
7462 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
7463 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
7464 }
7465 goto no_data_clone;
7466 }
7467
7468 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
7469 if (!$running || $snapname) {
7470 # TODO: handle bwlimits
7471 if ($drivename eq 'efidisk0') {
7472 # the relevant data on the efidisk may be smaller than the source
7473 # e.g. on RBD/ZFS, so we use dd to copy only the amount
7474 # that is given by the OVMF_VARS.fd
7475 my $src_path = PVE::Storage::path($storecfg, $drive->{file});
7476 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
7477
7478 # better for Ceph if block size is not too small, see bug #3324
7479 my $bs = 1024*1024;
7480
7481 run_command(['qemu-img', 'dd', '-n', '-O', $dst_format, "bs=$bs", "osize=$size",
7482 "if=$src_path", "of=$dst_path"]);
7483 } else {
7484 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit);
7485 }
7486 } else {
7487
7488 die "cannot move TPM state while VM is running\n" if $drivename eq 'tpmstate0';
7489
7490 my $kvmver = get_running_qemu_version ($vmid);
7491 if (!min_version($kvmver, 2, 7)) {
7492 die "drive-mirror with iothread requires qemu version 2.7 or higher\n"
7493 if $drive->{iothread};
7494 }
7495
7496 qemu_drive_mirror($vmid, $drivename, $newvolid, $newvmid, $sparseinit, $jobs,
7497 $completion, $qga, $bwlimit);
7498 }
7499 }
7500
7501 no_data_clone:
7502 my ($size) = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
7503
7504 my $disk = $drive;
7505 $disk->{format} = undef;
7506 $disk->{file} = $newvolid;
7507 $disk->{size} = $size if defined($size);
7508
7509 return $disk;
7510 }
7511
7512 sub get_running_qemu_version {
7513 my ($vmid) = @_;
7514 my $res = mon_cmd($vmid, "query-version");
7515 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
7516 }
7517
7518 sub qemu_use_old_bios_files {
7519 my ($machine_type) = @_;
7520
7521 return if !$machine_type;
7522
7523 my $use_old_bios_files = undef;
7524
7525 if ($machine_type =~ m/^(\S+)\.pxe$/) {
7526 $machine_type = $1;
7527 $use_old_bios_files = 1;
7528 } else {
7529 my $version = extract_version($machine_type, kvm_user_version());
7530 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
7531 # load new efi bios files on migration. So this hack is required to allow
7532 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
7533 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
7534 $use_old_bios_files = !min_version($version, 2, 4);
7535 }
7536
7537 return ($use_old_bios_files, $machine_type);
7538 }
7539
7540 sub get_efivars_size {
7541 my ($conf) = @_;
7542 my $arch = get_vm_arch($conf);
7543 my $efidisk = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
7544 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
7545 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7546 die "uefi vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
7547 return -s $ovmf_vars;
7548 }
7549
7550 sub update_efidisk_size {
7551 my ($conf) = @_;
7552
7553 return if !defined($conf->{efidisk0});
7554
7555 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
7556 $disk->{size} = get_efivars_size($conf);
7557 $conf->{efidisk0} = print_drive($disk);
7558
7559 return;
7560 }
7561
7562 sub update_tpmstate_size {
7563 my ($conf) = @_;
7564
7565 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
7566 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7567 $conf->{tpmstate0} = print_drive($disk);
7568 }
7569
7570 sub create_efidisk($$$$$$$) {
7571 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
7572
7573 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7574 die "EFI vars default image not found\n" if ! -f $ovmf_vars;
7575
7576 my $vars_size_b = -s $ovmf_vars;
7577 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
7578 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
7579 PVE::Storage::activate_volumes($storecfg, [$volid]);
7580
7581 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
7582 my ($size) = PVE::Storage::volume_size_info($storecfg, $volid, 3);
7583
7584 return ($volid, $size/1024);
7585 }
7586
7587 sub vm_iothreads_list {
7588 my ($vmid) = @_;
7589
7590 my $res = mon_cmd($vmid, 'query-iothreads');
7591
7592 my $iothreads = {};
7593 foreach my $iothread (@$res) {
7594 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
7595 }
7596
7597 return $iothreads;
7598 }
7599
7600 sub scsihw_infos {
7601 my ($conf, $drive) = @_;
7602
7603 my $maxdev = 0;
7604
7605 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
7606 $maxdev = 7;
7607 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
7608 $maxdev = 1;
7609 } else {
7610 $maxdev = 256;
7611 }
7612
7613 my $controller = int($drive->{index} / $maxdev);
7614 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
7615 ? "virtioscsi"
7616 : "scsihw";
7617
7618 return ($maxdev, $controller, $controller_prefix);
7619 }
7620
7621 sub windows_version {
7622 my ($ostype) = @_;
7623
7624 return 0 if !$ostype;
7625
7626 my $winversion = 0;
7627
7628 if($ostype eq 'wxp' || $ostype eq 'w2k3' || $ostype eq 'w2k') {
7629 $winversion = 5;
7630 } elsif($ostype eq 'w2k8' || $ostype eq 'wvista') {
7631 $winversion = 6;
7632 } elsif ($ostype =~ m/^win(\d+)$/) {
7633 $winversion = $1;
7634 }
7635
7636 return $winversion;
7637 }
7638
7639 sub resolve_dst_disk_format {
7640 my ($storecfg, $storeid, $src_volname, $format) = @_;
7641 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
7642
7643 if (!$format) {
7644 # if no target format is specified, use the source disk format as hint
7645 if ($src_volname) {
7646 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7647 $format = qemu_img_format($scfg, $src_volname);
7648 } else {
7649 return $defFormat;
7650 }
7651 }
7652
7653 # test if requested format is supported - else use default
7654 my $supported = grep { $_ eq $format } @$validFormats;
7655 $format = $defFormat if !$supported;
7656 return $format;
7657 }
7658
7659 # NOTE: if this logic changes, please update docs & possibly gui logic
7660 sub find_vmstate_storage {
7661 my ($conf, $storecfg) = @_;
7662
7663 # first, return storage from conf if set
7664 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
7665
7666 my ($target, $shared, $local);
7667
7668 foreach_storage_used_by_vm($conf, sub {
7669 my ($sid) = @_;
7670 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
7671 my $dst = $scfg->{shared} ? \$shared : \$local;
7672 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
7673 });
7674
7675 # second, use shared storage where VM has at least one disk
7676 # third, use local storage where VM has at least one disk
7677 # fall back to local storage
7678 $target = $shared // $local // 'local';
7679
7680 return $target;
7681 }
7682
7683 sub generate_uuid {
7684 my ($uuid, $uuid_str);
7685 UUID::generate($uuid);
7686 UUID::unparse($uuid, $uuid_str);
7687 return $uuid_str;
7688 }
7689
7690 sub generate_smbios1_uuid {
7691 return "uuid=".generate_uuid();
7692 }
7693
7694 sub nbd_stop {
7695 my ($vmid) = @_;
7696
7697 mon_cmd($vmid, 'nbd-server-stop');
7698 }
7699
7700 sub create_reboot_request {
7701 my ($vmid) = @_;
7702 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
7703 or die "failed to create reboot trigger file: $!\n";
7704 close($fh);
7705 }
7706
7707 sub clear_reboot_request {
7708 my ($vmid) = @_;
7709 my $path = "/run/qemu-server/$vmid.reboot";
7710 my $res = 0;
7711
7712 $res = unlink($path);
7713 die "could not remove reboot request for $vmid: $!"
7714 if !$res && $! != POSIX::ENOENT;
7715
7716 return $res;
7717 }
7718
7719 sub bootorder_from_legacy {
7720 my ($conf, $bootcfg) = @_;
7721
7722 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
7723 my $bootindex_hash = {};
7724 my $i = 1;
7725 foreach my $o (split(//, $boot)) {
7726 $bootindex_hash->{$o} = $i*100;
7727 $i++;
7728 }
7729
7730 my $bootorder = {};
7731
7732 PVE::QemuConfig->foreach_volume($conf, sub {
7733 my ($ds, $drive) = @_;
7734
7735 if (drive_is_cdrom ($drive, 1)) {
7736 if ($bootindex_hash->{d}) {
7737 $bootorder->{$ds} = $bootindex_hash->{d};
7738 $bootindex_hash->{d} += 1;
7739 }
7740 } elsif ($bootindex_hash->{c}) {
7741 $bootorder->{$ds} = $bootindex_hash->{c}
7742 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
7743 $bootindex_hash->{c} += 1;
7744 }
7745 });
7746
7747 if ($bootindex_hash->{n}) {
7748 for (my $i = 0; $i < $MAX_NETS; $i++) {
7749 my $netname = "net$i";
7750 next if !$conf->{$netname};
7751 $bootorder->{$netname} = $bootindex_hash->{n};
7752 $bootindex_hash->{n} += 1;
7753 }
7754 }
7755
7756 return $bootorder;
7757 }
7758
7759 # Generate default device list for 'boot: order=' property. Matches legacy
7760 # default boot order, but with explicit device names. This is important, since
7761 # the fallback for when neither 'order' nor the old format is specified relies
7762 # on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
7763 sub get_default_bootdevices {
7764 my ($conf) = @_;
7765
7766 my @ret = ();
7767
7768 # harddisk
7769 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
7770 push @ret, $first if $first;
7771
7772 # cdrom
7773 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
7774 push @ret, $first if $first;
7775
7776 # network
7777 for (my $i = 0; $i < $MAX_NETS; $i++) {
7778 my $netname = "net$i";
7779 next if !$conf->{$netname};
7780 push @ret, $netname;
7781 last;
7782 }
7783
7784 return \@ret;
7785 }
7786
7787 sub device_bootorder {
7788 my ($conf) = @_;
7789
7790 return bootorder_from_legacy($conf) if !defined($conf->{boot});
7791
7792 my $boot = parse_property_string($boot_fmt, $conf->{boot});
7793
7794 my $bootorder = {};
7795 if (!defined($boot) || $boot->{legacy}) {
7796 $bootorder = bootorder_from_legacy($conf, $boot);
7797 } elsif ($boot->{order}) {
7798 my $i = 100; # start at 100 to allow user to insert devices before us with -args
7799 for my $dev (PVE::Tools::split_list($boot->{order})) {
7800 $bootorder->{$dev} = $i++;
7801 }
7802 }
7803
7804 return $bootorder;
7805 }
7806
7807 sub register_qmeventd_handle {
7808 my ($vmid) = @_;
7809
7810 my $fh;
7811 my $peer = "/var/run/qmeventd.sock";
7812 my $count = 0;
7813
7814 for (;;) {
7815 $count++;
7816 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
7817 last if $fh;
7818 if ($! != EINTR && $! != EAGAIN) {
7819 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
7820 }
7821 if ($count > 4) {
7822 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
7823 . "after $count retries\n";
7824 }
7825 usleep(25000);
7826 }
7827
7828 # send handshake to mark VM as backing up
7829 print $fh to_json({vzdump => {vmid => "$vmid"}});
7830
7831 # return handle to be closed later when inhibit is no longer required
7832 return $fh;
7833 }
7834
7835 # bash completion helper
7836
7837 sub complete_backup_archives {
7838 my ($cmdname, $pname, $cvalue) = @_;
7839
7840 my $cfg = PVE::Storage::config();
7841
7842 my $storeid;
7843
7844 if ($cvalue =~ m/^([^:]+):/) {
7845 $storeid = $1;
7846 }
7847
7848 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
7849
7850 my $res = [];
7851 foreach my $id (keys %$data) {
7852 foreach my $item (@{$data->{$id}}) {
7853 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
7854 push @$res, $item->{volid} if defined($item->{volid});
7855 }
7856 }
7857
7858 return $res;
7859 }
7860
7861 my $complete_vmid_full = sub {
7862 my ($running) = @_;
7863
7864 my $idlist = vmstatus();
7865
7866 my $res = [];
7867
7868 foreach my $id (keys %$idlist) {
7869 my $d = $idlist->{$id};
7870 if (defined($running)) {
7871 next if $d->{template};
7872 next if $running && $d->{status} ne 'running';
7873 next if !$running && $d->{status} eq 'running';
7874 }
7875 push @$res, $id;
7876
7877 }
7878 return $res;
7879 };
7880
7881 sub complete_vmid {
7882 return &$complete_vmid_full();
7883 }
7884
7885 sub complete_vmid_stopped {
7886 return &$complete_vmid_full(0);
7887 }
7888
7889 sub complete_vmid_running {
7890 return &$complete_vmid_full(1);
7891 }
7892
7893 sub complete_storage {
7894
7895 my $cfg = PVE::Storage::config();
7896 my $ids = $cfg->{ids};
7897
7898 my $res = [];
7899 foreach my $sid (keys %$ids) {
7900 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
7901 next if !$ids->{$sid}->{content}->{images};
7902 push @$res, $sid;
7903 }
7904
7905 return $res;
7906 }
7907
7908 sub complete_migration_storage {
7909 my ($cmd, $param, $current_value, $all_args) = @_;
7910
7911 my $targetnode = @$all_args[1];
7912
7913 my $cfg = PVE::Storage::config();
7914 my $ids = $cfg->{ids};
7915
7916 my $res = [];
7917 foreach my $sid (keys %$ids) {
7918 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
7919 next if !$ids->{$sid}->{content}->{images};
7920 push @$res, $sid;
7921 }
7922
7923 return $res;
7924 }
7925
7926 sub vm_is_paused {
7927 my ($vmid) = @_;
7928 my $qmpstatus = eval {
7929 PVE::QemuConfig::assert_config_exists_on_node($vmid);
7930 mon_cmd($vmid, "query-status");
7931 };
7932 warn "$@\n" if $@;
7933 return $qmpstatus && $qmpstatus->{status} eq "paused";
7934 }
7935
7936 sub check_volume_storage_type {
7937 my ($storecfg, $vol) = @_;
7938
7939 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
7940 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7941 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
7942
7943 die "storage '$storeid' does not support content-type '$vtype'\n"
7944 if !$scfg->{content}->{$vtype};
7945
7946 return 1;
7947 }
7948
7949 1;