]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
14a17af0adb5013425aa0333dc8711a2bca5bdcf
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5
6 use Cwd 'abs_path';
7 use Digest::SHA;
8 use Fcntl ':flock';
9 use Fcntl;
10 use File::Basename;
11 use File::Copy qw(copy);
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use IO::Dir;
16 use IO::File;
17 use IO::Handle;
18 use IO::Select;
19 use IO::Socket::UNIX;
20 use IPC::Open3;
21 use JSON;
22 use MIME::Base64;
23 use POSIX;
24 use Storable qw(dclone);
25 use Time::HiRes qw(gettimeofday usleep);
26 use URI::Escape;
27 use UUID;
28
29 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
30 use PVE::CGroup;
31 use PVE::DataCenterConfig;
32 use PVE::Exception qw(raise raise_param_exc);
33 use PVE::Format qw(render_duration render_bytes);
34 use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
35 use PVE::INotify;
36 use PVE::JSONSchema qw(get_standard_option parse_property_string);
37 use PVE::ProcFSTools;
38 use PVE::PBSClient;
39 use PVE::RPCEnvironment;
40 use PVE::Storage;
41 use PVE::SysFSTools;
42 use PVE::Systemd;
43 use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
44
45 use PVE::QMPClient;
46 use PVE::QemuConfig;
47 use PVE::QemuServer::Helpers qw(min_version config_aware_timeout);
48 use PVE::QemuServer::Cloudinit;
49 use PVE::QemuServer::CGroup;
50 use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
51 use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
52 use PVE::QemuServer::Machine;
53 use PVE::QemuServer::Memory;
54 use PVE::QemuServer::Monitor qw(mon_cmd);
55 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
56 use PVE::QemuServer::USB qw(parse_usb_device);
57
58 my $have_sdn;
59 eval {
60 require PVE::Network::SDN::Zones;
61 $have_sdn = 1;
62 };
63
64 my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
65 my $OVMF = {
66 x86_64 => {
67 '4m' => [
68 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
69 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
70 ],
71 '4m-ms' => [
72 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
73 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
74 ],
75 default => [
76 "$EDK2_FW_BASE/OVMF_CODE.fd",
77 "$EDK2_FW_BASE/OVMF_VARS.fd",
78 ],
79 },
80 aarch64 => {
81 default => [
82 "$EDK2_FW_BASE/AAVMF_CODE.fd",
83 "$EDK2_FW_BASE/AAVMF_VARS.fd",
84 ],
85 },
86 };
87
88 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
89
90 # Note about locking: we use flock on the config file protect against concurent actions.
91 # Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
92 # 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
93 # But you can ignore this kind of lock with the --skiplock flag.
94
95 cfs_register_file('/qemu-server/',
96 \&parse_vm_config,
97 \&write_vm_config);
98
99 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
100 description => "Some command save/restore state from this location.",
101 type => 'string',
102 maxLength => 128,
103 optional => 1,
104 });
105
106 PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
107 description => "Specifies the Qemu machine type.",
108 type => 'string',
109 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
110 maxLength => 40,
111 optional => 1,
112 });
113
114
115 sub map_storage {
116 my ($map, $source) = @_;
117
118 return $source if !defined($map);
119
120 return $map->{entries}->{$source}
121 if $map->{entries} && defined($map->{entries}->{$source});
122
123 return $map->{default} if $map->{default};
124
125 # identity (fallback)
126 return $source;
127 }
128
129 PVE::JSONSchema::register_standard_option('pve-targetstorage', {
130 description => "Mapping from source to target storages. Providing only a single storage ID maps all source storages to that storage. Providing the special value '1' will map each source storage to itself.",
131 type => 'string',
132 format => 'storagepair-list',
133 optional => 1,
134 });
135
136 #no warnings 'redefine';
137
138 my $nodename_cache;
139 sub nodename {
140 $nodename_cache //= PVE::INotify::nodename();
141 return $nodename_cache;
142 }
143
144 my $watchdog_fmt = {
145 model => {
146 default_key => 1,
147 type => 'string',
148 enum => [qw(i6300esb ib700)],
149 description => "Watchdog type to emulate.",
150 default => 'i6300esb',
151 optional => 1,
152 },
153 action => {
154 type => 'string',
155 enum => [qw(reset shutdown poweroff pause debug none)],
156 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
157 optional => 1,
158 },
159 };
160 PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
161
162 my $agent_fmt = {
163 enabled => {
164 description => "Enable/disable communication with a Qemu Guest Agent (QGA) running in the VM.",
165 type => 'boolean',
166 default => 0,
167 default_key => 1,
168 },
169 fstrim_cloned_disks => {
170 description => "Run fstrim after moving a disk or migrating the VM.",
171 type => 'boolean',
172 optional => 1,
173 default => 0
174 },
175 type => {
176 description => "Select the agent type",
177 type => 'string',
178 default => 'virtio',
179 optional => 1,
180 enum => [qw(virtio isa)],
181 },
182 };
183
184 my $vga_fmt = {
185 type => {
186 description => "Select the VGA type.",
187 type => 'string',
188 default => 'std',
189 optional => 1,
190 default_key => 1,
191 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio vmware)],
192 },
193 memory => {
194 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
195 type => 'integer',
196 optional => 1,
197 minimum => 4,
198 maximum => 512,
199 },
200 };
201
202 my $ivshmem_fmt = {
203 size => {
204 type => 'integer',
205 minimum => 1,
206 description => "The size of the file in MB.",
207 },
208 name => {
209 type => 'string',
210 pattern => '[a-zA-Z0-9\-]+',
211 optional => 1,
212 format_description => 'string',
213 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
214 },
215 };
216
217 my $audio_fmt = {
218 device => {
219 type => 'string',
220 enum => [qw(ich9-intel-hda intel-hda AC97)],
221 description => "Configure an audio device."
222 },
223 driver => {
224 type => 'string',
225 enum => ['spice', 'none'],
226 default => 'spice',
227 optional => 1,
228 description => "Driver backend for the audio device."
229 },
230 };
231
232 my $spice_enhancements_fmt = {
233 foldersharing => {
234 type => 'boolean',
235 optional => 1,
236 default => '0',
237 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
238 },
239 videostreaming => {
240 type => 'string',
241 enum => ['off', 'all', 'filter'],
242 default => 'off',
243 optional => 1,
244 description => "Enable video streaming. Uses compression for detected video streams."
245 },
246 };
247
248 my $rng_fmt = {
249 source => {
250 type => 'string',
251 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
252 default_key => 1,
253 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
254 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
255 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
256 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
257 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
258 ." a hardware RNG from the host.",
259 },
260 max_bytes => {
261 type => 'integer',
262 description => "Maximum bytes of entropy allowed to get injected into the guest every"
263 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
264 ." `0` to disable limiting (potentially dangerous!).",
265 optional => 1,
266
267 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
268 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
269 # reading from /dev/urandom
270 default => 1024,
271 },
272 period => {
273 type => 'integer',
274 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
275 ." the guest to retrieve another 'max_bytes' of entropy.",
276 optional => 1,
277 default => 1000,
278 },
279 };
280
281 my $confdesc = {
282 onboot => {
283 optional => 1,
284 type => 'boolean',
285 description => "Specifies whether a VM will be started during system bootup.",
286 default => 0,
287 },
288 autostart => {
289 optional => 1,
290 type => 'boolean',
291 description => "Automatic restart after crash (currently ignored).",
292 default => 0,
293 },
294 hotplug => {
295 optional => 1,
296 type => 'string', format => 'pve-hotplug-features',
297 description => "Selectively enable hotplug features. This is a comma separated list of"
298 ." hotplug features: 'network', 'disk', 'cpu', 'memory' and 'usb'. Use '0' to disable"
299 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`.",
300 default => 'network,disk,usb',
301 },
302 reboot => {
303 optional => 1,
304 type => 'boolean',
305 description => "Allow reboot. If set to '0' the VM exit on reboot.",
306 default => 1,
307 },
308 lock => {
309 optional => 1,
310 type => 'string',
311 description => "Lock/unlock the VM.",
312 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
313 },
314 cpulimit => {
315 optional => 1,
316 type => 'number',
317 description => "Limit of CPU usage.",
318 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
319 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
320 minimum => 0,
321 maximum => 128,
322 default => 0,
323 },
324 cpuunits => {
325 optional => 1,
326 type => 'integer',
327 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
328 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
329 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
330 ." weights of all the other running VMs.",
331 minimum => 2,
332 maximum => 262144,
333 default => 'cgroup v1: 1024, cgroup v2: 100',
334 },
335 memory => {
336 optional => 1,
337 type => 'integer',
338 description => "Amount of RAM for the VM in MB. This is the maximum available memory when"
339 ." you use the balloon device.",
340 minimum => 16,
341 default => 512,
342 },
343 balloon => {
344 optional => 1,
345 type => 'integer',
346 description => "Amount of target RAM for the VM in MB. Using zero disables the ballon driver.",
347 minimum => 0,
348 },
349 shares => {
350 optional => 1,
351 type => 'integer',
352 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
353 ." more memory this VM gets. Number is relative to weights of all other running VMs."
354 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
355 minimum => 0,
356 maximum => 50000,
357 default => 1000,
358 },
359 keyboard => {
360 optional => 1,
361 type => 'string',
362 description => "Keyboard layout for VNC server. The default is read from the"
363 ."'/etc/pve/datacenter.cfg' configuration file. It should not be necessary to set it.",
364 enum => PVE::Tools::kvmkeymaplist(),
365 default => undef,
366 },
367 name => {
368 optional => 1,
369 type => 'string', format => 'dns-name',
370 description => "Set a name for the VM. Only used on the configuration web interface.",
371 },
372 scsihw => {
373 optional => 1,
374 type => 'string',
375 description => "SCSI controller model",
376 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
377 default => 'lsi',
378 },
379 description => {
380 optional => 1,
381 type => 'string',
382 description => "Description for the VM. Shown in the web-interface VM's summary."
383 ." This is saved as comment inside the configuration file.",
384 maxLength => 1024 * 8,
385 },
386 ostype => {
387 optional => 1,
388 type => 'string',
389 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
390 description => "Specify guest operating system.",
391 verbose_description => <<EODESC,
392 Specify guest operating system. This is used to enable special
393 optimization/features for specific operating systems:
394
395 [horizontal]
396 other;; unspecified OS
397 wxp;; Microsoft Windows XP
398 w2k;; Microsoft Windows 2000
399 w2k3;; Microsoft Windows 2003
400 w2k8;; Microsoft Windows 2008
401 wvista;; Microsoft Windows Vista
402 win7;; Microsoft Windows 7
403 win8;; Microsoft Windows 8/2012/2012r2
404 win10;; Microsoft Windows 10/2016/2019
405 win11;; Microsoft Windows 11/2022
406 l24;; Linux 2.4 Kernel
407 l26;; Linux 2.6 - 5.X Kernel
408 solaris;; Solaris/OpenSolaris/OpenIndiania kernel
409 EODESC
410 },
411 boot => {
412 optional => 1,
413 type => 'string', format => 'pve-qm-boot',
414 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
415 ." key or 'legacy=' is deprecated.",
416 },
417 bootdisk => {
418 optional => 1,
419 type => 'string', format => 'pve-qm-bootdisk',
420 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
421 pattern => '(ide|sata|scsi|virtio)\d+',
422 },
423 smp => {
424 optional => 1,
425 type => 'integer',
426 description => "The number of CPUs. Please use option -sockets instead.",
427 minimum => 1,
428 default => 1,
429 },
430 sockets => {
431 optional => 1,
432 type => 'integer',
433 description => "The number of CPU sockets.",
434 minimum => 1,
435 default => 1,
436 },
437 cores => {
438 optional => 1,
439 type => 'integer',
440 description => "The number of cores per socket.",
441 minimum => 1,
442 default => 1,
443 },
444 numa => {
445 optional => 1,
446 type => 'boolean',
447 description => "Enable/disable NUMA.",
448 default => 0,
449 },
450 hugepages => {
451 optional => 1,
452 type => 'string',
453 description => "Enable/disable hugepages memory.",
454 enum => [qw(any 2 1024)],
455 },
456 keephugepages => {
457 optional => 1,
458 type => 'boolean',
459 default => 0,
460 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
461 ." after VM shutdown and can be used for subsequent starts.",
462 },
463 vcpus => {
464 optional => 1,
465 type => 'integer',
466 description => "Number of hotplugged vcpus.",
467 minimum => 1,
468 default => 0,
469 },
470 acpi => {
471 optional => 1,
472 type => 'boolean',
473 description => "Enable/disable ACPI.",
474 default => 1,
475 },
476 agent => {
477 optional => 1,
478 description => "Enable/disable communication with the Qemu Guest Agent and its properties.",
479 type => 'string',
480 format => $agent_fmt,
481 },
482 kvm => {
483 optional => 1,
484 type => 'boolean',
485 description => "Enable/disable KVM hardware virtualization.",
486 default => 1,
487 },
488 tdf => {
489 optional => 1,
490 type => 'boolean',
491 description => "Enable/disable time drift fix.",
492 default => 0,
493 },
494 localtime => {
495 optional => 1,
496 type => 'boolean',
497 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
498 ." the `ostype` indicates a Microsoft Windows OS.",
499 },
500 freeze => {
501 optional => 1,
502 type => 'boolean',
503 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
504 },
505 vga => {
506 optional => 1,
507 type => 'string', format => $vga_fmt,
508 description => "Configure the VGA hardware.",
509 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
510 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
511 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
512 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
513 ." display server. For win* OS you can select how many independent displays you want,"
514 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
515 ." using a serial device as terminal.",
516 },
517 watchdog => {
518 optional => 1,
519 type => 'string', format => 'pve-qm-watchdog',
520 description => "Create a virtual hardware watchdog device.",
521 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
522 ." action), the watchdog must be periodically polled by an agent inside the guest or"
523 ." else the watchdog will reset the guest (or execute the respective action specified)",
524 },
525 startdate => {
526 optional => 1,
527 type => 'string',
528 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
529 description => "Set the initial date of the real time clock. Valid format for date are:"
530 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
531 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
532 default => 'now',
533 },
534 startup => get_standard_option('pve-startup-order'),
535 template => {
536 optional => 1,
537 type => 'boolean',
538 description => "Enable/disable Template.",
539 default => 0,
540 },
541 args => {
542 optional => 1,
543 type => 'string',
544 description => "Arbitrary arguments passed to kvm.",
545 verbose_description => <<EODESCR,
546 Arbitrary arguments passed to kvm, for example:
547
548 args: -no-reboot -no-hpet
549
550 NOTE: this option is for experts only.
551 EODESCR
552 },
553 tablet => {
554 optional => 1,
555 type => 'boolean',
556 default => 1,
557 description => "Enable/disable the USB tablet device.",
558 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
559 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
560 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
561 ." may consider disabling this to save some context switches. This is turned off by"
562 ." default if you use spice (`qm set <vmid> --vga qxl`).",
563 },
564 migrate_speed => {
565 optional => 1,
566 type => 'integer',
567 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
568 minimum => 0,
569 default => 0,
570 },
571 migrate_downtime => {
572 optional => 1,
573 type => 'number',
574 description => "Set maximum tolerated downtime (in seconds) for migrations.",
575 minimum => 0,
576 default => 0.1,
577 },
578 cdrom => {
579 optional => 1,
580 type => 'string', format => 'pve-qm-ide',
581 typetext => '<volume>',
582 description => "This is an alias for option -ide2",
583 },
584 cpu => {
585 optional => 1,
586 description => "Emulated CPU type.",
587 type => 'string',
588 format => 'pve-vm-cpu-conf',
589 },
590 parent => get_standard_option('pve-snapshot-name', {
591 optional => 1,
592 description => "Parent snapshot name. This is used internally, and should not be modified.",
593 }),
594 snaptime => {
595 optional => 1,
596 description => "Timestamp for snapshots.",
597 type => 'integer',
598 minimum => 0,
599 },
600 vmstate => {
601 optional => 1,
602 type => 'string', format => 'pve-volume-id',
603 description => "Reference to a volume which stores the VM state. This is used internally"
604 ." for snapshots.",
605 },
606 vmstatestorage => get_standard_option('pve-storage-id', {
607 description => "Default storage for VM state volumes/files.",
608 optional => 1,
609 }),
610 runningmachine => get_standard_option('pve-qemu-machine', {
611 description => "Specifies the QEMU machine type of the running vm. This is used internally"
612 ." for snapshots.",
613 }),
614 runningcpu => {
615 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
616 ." internally for snapshots.",
617 optional => 1,
618 type => 'string',
619 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
620 format_description => 'QEMU -cpu parameter'
621 },
622 machine => get_standard_option('pve-qemu-machine'),
623 arch => {
624 description => "Virtual processor architecture. Defaults to the host.",
625 optional => 1,
626 type => 'string',
627 enum => [qw(x86_64 aarch64)],
628 },
629 smbios1 => {
630 description => "Specify SMBIOS type 1 fields.",
631 type => 'string', format => 'pve-qm-smbios1',
632 maxLength => 512,
633 optional => 1,
634 },
635 protection => {
636 optional => 1,
637 type => 'boolean',
638 description => "Sets the protection flag of the VM. This will disable the remove VM and"
639 ." remove disk operations.",
640 default => 0,
641 },
642 bios => {
643 optional => 1,
644 type => 'string',
645 enum => [ qw(seabios ovmf) ],
646 description => "Select BIOS implementation.",
647 default => 'seabios',
648 },
649 vmgenid => {
650 type => 'string',
651 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
652 format_description => 'UUID',
653 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
654 ." to disable explicitly.",
655 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
656 ." value identifier to the guest OS. This allows to notify the guest operating system"
657 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
658 ." execution or creation from a template). The guest operating system notices the"
659 ." change, and is then able to react as appropriate by marking its copies of"
660 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
661 ."Note that auto-creation only works when done through API/CLI create or update methods"
662 .", but not when manually editing the config file.",
663 default => "1 (autogenerated)",
664 optional => 1,
665 },
666 hookscript => {
667 type => 'string',
668 format => 'pve-volume-id',
669 optional => 1,
670 description => "Script that will be executed during various steps in the vms lifetime.",
671 },
672 ivshmem => {
673 type => 'string',
674 format => $ivshmem_fmt,
675 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
676 ." the host.",
677 optional => 1,
678 },
679 audio0 => {
680 type => 'string',
681 format => $audio_fmt,
682 description => "Configure a audio device, useful in combination with QXL/Spice.",
683 optional => 1
684 },
685 spice_enhancements => {
686 type => 'string',
687 format => $spice_enhancements_fmt,
688 description => "Configure additional enhancements for SPICE.",
689 optional => 1
690 },
691 tags => {
692 type => 'string', format => 'pve-tag-list',
693 description => 'Tags of the VM. This is only meta information.',
694 optional => 1,
695 },
696 rng0 => {
697 type => 'string',
698 format => $rng_fmt,
699 description => "Configure a VirtIO-based Random Number Generator.",
700 optional => 1,
701 },
702 };
703
704 my $cicustom_fmt = {
705 meta => {
706 type => 'string',
707 optional => 1,
708 description => 'Specify a custom file containing all meta data passed to the VM via"
709 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
710 format => 'pve-volume-id',
711 format_description => 'volume',
712 },
713 network => {
714 type => 'string',
715 optional => 1,
716 description => 'Specify a custom file containing all network data passed to the VM via'
717 .' cloud-init.',
718 format => 'pve-volume-id',
719 format_description => 'volume',
720 },
721 user => {
722 type => 'string',
723 optional => 1,
724 description => 'Specify a custom file containing all user data passed to the VM via'
725 .' cloud-init.',
726 format => 'pve-volume-id',
727 format_description => 'volume',
728 },
729 };
730 PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
731
732 my $confdesc_cloudinit = {
733 citype => {
734 optional => 1,
735 type => 'string',
736 description => 'Specifies the cloud-init configuration format. The default depends on the'
737 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
738 .' and `configdrive2` for windows.',
739 enum => ['configdrive2', 'nocloud', 'opennebula'],
740 },
741 ciuser => {
742 optional => 1,
743 type => 'string',
744 description => "cloud-init: User name to change ssh keys and password for instead of the"
745 ." image's configured default user.",
746 },
747 cipassword => {
748 optional => 1,
749 type => 'string',
750 description => 'cloud-init: Password to assign the user. Using this is generally not'
751 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
752 .' support hashed passwords.',
753 },
754 cicustom => {
755 optional => 1,
756 type => 'string',
757 description => 'cloud-init: Specify custom files to replace the automatically generated'
758 .' ones at start.',
759 format => 'pve-qm-cicustom',
760 },
761 searchdomain => {
762 optional => 1,
763 type => 'string',
764 description => "cloud-init: Sets DNS search domains for a container. Create will'
765 .' automatically use the setting from the host if neither searchdomain nor nameserver'
766 .' are set.",
767 },
768 nameserver => {
769 optional => 1,
770 type => 'string', format => 'address-list',
771 description => "cloud-init: Sets DNS server IP address for a container. Create will'
772 .' automatically use the setting from the host if neither searchdomain nor nameserver'
773 .' are set.",
774 },
775 sshkeys => {
776 optional => 1,
777 type => 'string',
778 format => 'urlencoded',
779 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
780 },
781 };
782
783 # what about other qemu settings ?
784 #cpu => 'string',
785 #machine => 'string',
786 #fda => 'file',
787 #fdb => 'file',
788 #mtdblock => 'file',
789 #sd => 'file',
790 #pflash => 'file',
791 #snapshot => 'bool',
792 #bootp => 'file',
793 ##tftp => 'dir',
794 ##smb => 'dir',
795 #kernel => 'file',
796 #append => 'string',
797 #initrd => 'file',
798 ##soundhw => 'string',
799
800 while (my ($k, $v) = each %$confdesc) {
801 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
802 }
803
804 my $MAX_USB_DEVICES = 5;
805 my $MAX_NETS = 32;
806 my $MAX_SERIAL_PORTS = 4;
807 my $MAX_PARALLEL_PORTS = 3;
808 my $MAX_NUMA = 8;
809
810 my $numa_fmt = {
811 cpus => {
812 type => "string",
813 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
814 description => "CPUs accessing this NUMA node.",
815 format_description => "id[-id];...",
816 },
817 memory => {
818 type => "number",
819 description => "Amount of memory this NUMA node provides.",
820 optional => 1,
821 },
822 hostnodes => {
823 type => "string",
824 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
825 description => "Host NUMA nodes to use.",
826 format_description => "id[-id];...",
827 optional => 1,
828 },
829 policy => {
830 type => 'string',
831 enum => [qw(preferred bind interleave)],
832 description => "NUMA allocation policy.",
833 optional => 1,
834 },
835 };
836 PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
837 my $numadesc = {
838 optional => 1,
839 type => 'string', format => $numa_fmt,
840 description => "NUMA topology.",
841 };
842 PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
843
844 for (my $i = 0; $i < $MAX_NUMA; $i++) {
845 $confdesc->{"numa$i"} = $numadesc;
846 }
847
848 my $nic_model_list = [
849 'e1000',
850 'e1000-82540em',
851 'e1000-82544gc',
852 'e1000-82545em',
853 'e1000e',
854 'i82551',
855 'i82557b',
856 'i82559er',
857 'ne2k_isa',
858 'ne2k_pci',
859 'pcnet',
860 'rtl8139',
861 'virtio',
862 'vmxnet3',
863 ];
864 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
865
866 my $net_fmt_bridge_descr = <<__EOD__;
867 Bridge to attach the network device to. The Proxmox VE standard bridge
868 is called 'vmbr0'.
869
870 If you do not specify a bridge, we create a kvm user (NATed) network
871 device, which provides DHCP and DNS services. The following addresses
872 are used:
873
874 10.0.2.2 Gateway
875 10.0.2.3 DNS Server
876 10.0.2.4 SMB Server
877
878 The DHCP server assign addresses to the guest starting from 10.0.2.15.
879 __EOD__
880
881 my $net_fmt = {
882 macaddr => get_standard_option('mac-addr', {
883 description => "MAC address. That address must be unique withing your network. This is"
884 ." automatically generated if not specified.",
885 }),
886 model => {
887 type => 'string',
888 description => "Network Card Model. The 'virtio' model provides the best performance with"
889 ." very low CPU overhead. If your guest does not support this driver, it is usually"
890 ." best to use 'e1000'.",
891 enum => $nic_model_list,
892 default_key => 1,
893 },
894 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
895 bridge => {
896 type => 'string',
897 description => $net_fmt_bridge_descr,
898 format_description => 'bridge',
899 pattern => '[-_.\w\d]+',
900 optional => 1,
901 },
902 queues => {
903 type => 'integer',
904 minimum => 0, maximum => 16,
905 description => 'Number of packet queues to be used on the device.',
906 optional => 1,
907 },
908 rate => {
909 type => 'number',
910 minimum => 0,
911 description => "Rate limit in mbps (megabytes per second) as floating point number.",
912 optional => 1,
913 },
914 tag => {
915 type => 'integer',
916 minimum => 1, maximum => 4094,
917 description => 'VLAN tag to apply to packets on this interface.',
918 optional => 1,
919 },
920 trunks => {
921 type => 'string',
922 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
923 description => 'VLAN trunks to pass through this interface.',
924 format_description => 'vlanid[;vlanid...]',
925 optional => 1,
926 },
927 firewall => {
928 type => 'boolean',
929 description => 'Whether this interface should be protected by the firewall.',
930 optional => 1,
931 },
932 link_down => {
933 type => 'boolean',
934 description => 'Whether this interface should be disconnected (like pulling the plug).',
935 optional => 1,
936 },
937 mtu => {
938 type => 'integer',
939 minimum => 1, maximum => 65520,
940 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
941 optional => 1,
942 },
943 };
944
945 my $netdesc = {
946 optional => 1,
947 type => 'string', format => $net_fmt,
948 description => "Specify network devices.",
949 };
950
951 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
952
953 my $ipconfig_fmt = {
954 ip => {
955 type => 'string',
956 format => 'pve-ipv4-config',
957 format_description => 'IPv4Format/CIDR',
958 description => 'IPv4 address in CIDR format.',
959 optional => 1,
960 default => 'dhcp',
961 },
962 gw => {
963 type => 'string',
964 format => 'ipv4',
965 format_description => 'GatewayIPv4',
966 description => 'Default gateway for IPv4 traffic.',
967 optional => 1,
968 requires => 'ip',
969 },
970 ip6 => {
971 type => 'string',
972 format => 'pve-ipv6-config',
973 format_description => 'IPv6Format/CIDR',
974 description => 'IPv6 address in CIDR format.',
975 optional => 1,
976 default => 'dhcp',
977 },
978 gw6 => {
979 type => 'string',
980 format => 'ipv6',
981 format_description => 'GatewayIPv6',
982 description => 'Default gateway for IPv6 traffic.',
983 optional => 1,
984 requires => 'ip6',
985 },
986 };
987 PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
988 my $ipconfigdesc = {
989 optional => 1,
990 type => 'string', format => 'pve-qm-ipconfig',
991 description => <<'EODESCR',
992 cloud-init: Specify IP addresses and gateways for the corresponding interface.
993
994 IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
995
996 The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
997 gateway should be provided.
998 For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
999 cloud-init 19.4 or newer.
1000
1001 If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1002 dhcp on IPv4.
1003 EODESCR
1004 };
1005 PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1006
1007 for (my $i = 0; $i < $MAX_NETS; $i++) {
1008 $confdesc->{"net$i"} = $netdesc;
1009 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1010 }
1011
1012 foreach my $key (keys %$confdesc_cloudinit) {
1013 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1014 }
1015
1016 PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1017 sub verify_volume_id_or_qm_path {
1018 my ($volid, $noerr) = @_;
1019
1020 if ($volid eq 'none' || $volid eq 'cdrom' || $volid =~ m|^/|) {
1021 return $volid;
1022 }
1023
1024 # if its neither 'none' nor 'cdrom' nor a path, check if its a volume-id
1025 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1026 if ($@) {
1027 return if $noerr;
1028 die $@;
1029 }
1030 return $volid;
1031 }
1032
1033 my $usb_fmt = {
1034 host => {
1035 default_key => 1,
1036 type => 'string', format => 'pve-qm-usb-device',
1037 format_description => 'HOSTUSBDEVICE|spice',
1038 description => <<EODESCR,
1039 The Host USB device or port or the value 'spice'. HOSTUSBDEVICE syntax is:
1040
1041 'bus-port(.port)*' (decimal numbers) or
1042 'vendor_id:product_id' (hexadeciaml numbers) or
1043 'spice'
1044
1045 You can use the 'lsusb -t' command to list existing usb devices.
1046
1047 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1048 machines - use with special care.
1049
1050 The value 'spice' can be used to add a usb redirection devices for spice.
1051 EODESCR
1052 },
1053 usb3 => {
1054 optional => 1,
1055 type => 'boolean',
1056 description => "Specifies whether if given host option is a USB3 device or port.",
1057 default => 0,
1058 },
1059 };
1060
1061 my $usbdesc = {
1062 optional => 1,
1063 type => 'string', format => $usb_fmt,
1064 description => "Configure an USB device (n is 0 to 4).",
1065 };
1066 PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
1067
1068 my $serialdesc = {
1069 optional => 1,
1070 type => 'string',
1071 pattern => '(/dev/.+|socket)',
1072 description => "Create a serial device inside the VM (n is 0 to 3)",
1073 verbose_description => <<EODESCR,
1074 Create a serial device inside the VM (n is 0 to 3), and pass through a
1075 host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1076 host side (use 'qm terminal' to open a terminal connection).
1077
1078 NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1079 use with special care.
1080
1081 CAUTION: Experimental! User reported problems with this option.
1082 EODESCR
1083 };
1084
1085 my $paralleldesc= {
1086 optional => 1,
1087 type => 'string',
1088 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1089 description => "Map host parallel devices (n is 0 to 2).",
1090 verbose_description => <<EODESCR,
1091 Map host parallel devices (n is 0 to 2).
1092
1093 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1094 machines - use with special care.
1095
1096 CAUTION: Experimental! User reported problems with this option.
1097 EODESCR
1098 };
1099
1100 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1101 $confdesc->{"parallel$i"} = $paralleldesc;
1102 }
1103
1104 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1105 $confdesc->{"serial$i"} = $serialdesc;
1106 }
1107
1108 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1109 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1110 }
1111
1112 for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1113 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1114 }
1115
1116 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1117 $confdesc->{"usb$i"} = $usbdesc;
1118 }
1119
1120 my $boot_fmt = {
1121 legacy => {
1122 optional => 1,
1123 default_key => 1,
1124 type => 'string',
1125 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1126 . " Deprecated, use 'order=' instead.",
1127 pattern => '[acdn]{1,4}',
1128 format_description => "[acdn]{1,4}",
1129
1130 # note: this is also the fallback if boot: is not given at all
1131 default => 'cdn',
1132 },
1133 order => {
1134 optional => 1,
1135 type => 'string',
1136 format => 'pve-qm-bootdev-list',
1137 format_description => "device[;device...]",
1138 description => <<EODESC,
1139 The guest will attempt to boot from devices in the order they appear here.
1140
1141 Disks, optical drives and passed-through storage USB devices will be directly
1142 booted from, NICs will load PXE, and PCIe devices will either behave like disks
1143 (e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1144
1145 Note that only devices in this list will be marked as bootable and thus loaded
1146 by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1147 (e.g. software-raid), you need to specify all of them here.
1148
1149 Overrides the deprecated 'legacy=[acdn]*' value when given.
1150 EODESC
1151 },
1152 };
1153 PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1154
1155 PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1156 sub verify_bootdev {
1157 my ($dev, $noerr) = @_;
1158
1159 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1160 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1161
1162 my $check = sub {
1163 my ($base) = @_;
1164 return 0 if $dev !~ m/^$base\d+$/;
1165 return 0 if !$confdesc->{$dev};
1166 return 1;
1167 };
1168
1169 return $dev if $check->("net");
1170 return $dev if $check->("usb");
1171 return $dev if $check->("hostpci");
1172
1173 return if $noerr;
1174 die "invalid boot device '$dev'\n";
1175 }
1176
1177 sub print_bootorder {
1178 my ($devs) = @_;
1179 return "" if !@$devs;
1180 my $data = { order => join(';', @$devs) };
1181 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1182 }
1183
1184 my $kvm_api_version = 0;
1185
1186 sub kvm_version {
1187 return $kvm_api_version if $kvm_api_version;
1188
1189 open my $fh, '<', '/dev/kvm' or return;
1190
1191 # 0xae00 => KVM_GET_API_VERSION
1192 $kvm_api_version = ioctl($fh, 0xae00, 0);
1193 close($fh);
1194
1195 return $kvm_api_version;
1196 }
1197
1198 my $kvm_user_version = {};
1199 my $kvm_mtime = {};
1200
1201 sub kvm_user_version {
1202 my ($binary) = @_;
1203
1204 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1205 my $st = stat($binary);
1206
1207 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1208 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1209 $cachedmtime == $st->mtime;
1210
1211 $kvm_user_version->{$binary} = 'unknown';
1212 $kvm_mtime->{$binary} = $st->mtime;
1213
1214 my $code = sub {
1215 my $line = shift;
1216 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1217 $kvm_user_version->{$binary} = $2;
1218 }
1219 };
1220
1221 eval { run_command([$binary, '--version'], outfunc => $code); };
1222 warn $@ if $@;
1223
1224 return $kvm_user_version->{$binary};
1225
1226 }
1227 my sub extract_version {
1228 my ($machine_type, $version) = @_;
1229 $version = kvm_user_version() if !defined($version);
1230 PVE::QemuServer::Machine::extract_version($machine_type, $version)
1231 }
1232
1233 sub kernel_has_vhost_net {
1234 return -c '/dev/vhost-net';
1235 }
1236
1237 sub option_exists {
1238 my $key = shift;
1239 return defined($confdesc->{$key});
1240 }
1241
1242 my $cdrom_path;
1243 sub get_cdrom_path {
1244
1245 return $cdrom_path if $cdrom_path;
1246
1247 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
1248 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
1249 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
1250 }
1251
1252 sub get_iso_path {
1253 my ($storecfg, $vmid, $cdrom) = @_;
1254
1255 if ($cdrom eq 'cdrom') {
1256 return get_cdrom_path();
1257 } elsif ($cdrom eq 'none') {
1258 return '';
1259 } elsif ($cdrom =~ m|^/|) {
1260 return $cdrom;
1261 } else {
1262 return PVE::Storage::path($storecfg, $cdrom);
1263 }
1264 }
1265
1266 # try to convert old style file names to volume IDs
1267 sub filename_to_volume_id {
1268 my ($vmid, $file, $media) = @_;
1269
1270 if (!($file eq 'none' || $file eq 'cdrom' ||
1271 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1272
1273 return if $file =~ m|/|;
1274
1275 if ($media && $media eq 'cdrom') {
1276 $file = "local:iso/$file";
1277 } else {
1278 $file = "local:$vmid/$file";
1279 }
1280 }
1281
1282 return $file;
1283 }
1284
1285 sub verify_media_type {
1286 my ($opt, $vtype, $media) = @_;
1287
1288 return if !$media;
1289
1290 my $etype;
1291 if ($media eq 'disk') {
1292 $etype = 'images';
1293 } elsif ($media eq 'cdrom') {
1294 $etype = 'iso';
1295 } else {
1296 die "internal error";
1297 }
1298
1299 return if ($vtype eq $etype);
1300
1301 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1302 }
1303
1304 sub cleanup_drive_path {
1305 my ($opt, $storecfg, $drive) = @_;
1306
1307 # try to convert filesystem paths to volume IDs
1308
1309 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1310 ($drive->{file} !~ m|^/dev/.+|) &&
1311 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1312 ($drive->{file} !~ m/^\d+$/)) {
1313 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1314 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1315 if !$vtype;
1316 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1317 verify_media_type($opt, $vtype, $drive->{media});
1318 $drive->{file} = $volid;
1319 }
1320
1321 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1322 }
1323
1324 sub parse_hotplug_features {
1325 my ($data) = @_;
1326
1327 my $res = {};
1328
1329 return $res if $data eq '0';
1330
1331 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1332
1333 foreach my $feature (PVE::Tools::split_list($data)) {
1334 if ($feature =~ m/^(network|disk|cpu|memory|usb)$/) {
1335 $res->{$1} = 1;
1336 } else {
1337 die "invalid hotplug feature '$feature'\n";
1338 }
1339 }
1340 return $res;
1341 }
1342
1343 PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1344 sub pve_verify_hotplug_features {
1345 my ($value, $noerr) = @_;
1346
1347 return $value if parse_hotplug_features($value);
1348
1349 return if $noerr;
1350
1351 die "unable to parse hotplug option\n";
1352 }
1353
1354 sub scsi_inquiry {
1355 my($fh, $noerr) = @_;
1356
1357 my $SG_IO = 0x2285;
1358 my $SG_GET_VERSION_NUM = 0x2282;
1359
1360 my $versionbuf = "\x00" x 8;
1361 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1362 if (!$ret) {
1363 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1364 return;
1365 }
1366 my $version = unpack("I", $versionbuf);
1367 if ($version < 30000) {
1368 die "scsi generic interface too old\n" if !$noerr;
1369 return;
1370 }
1371
1372 my $buf = "\x00" x 36;
1373 my $sensebuf = "\x00" x 8;
1374 my $cmd = pack("C x3 C x1", 0x12, 36);
1375
1376 # see /usr/include/scsi/sg.h
1377 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1378
1379 my $packet = pack($sg_io_hdr_t, ord('S'), -3, length($cmd),
1380 length($sensebuf), 0, length($buf), $buf,
1381 $cmd, $sensebuf, 6000);
1382
1383 $ret = ioctl($fh, $SG_IO, $packet);
1384 if (!$ret) {
1385 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1386 return;
1387 }
1388
1389 my @res = unpack($sg_io_hdr_t, $packet);
1390 if ($res[17] || $res[18]) {
1391 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1392 return;
1393 }
1394
1395 my $res = {};
1396 (my $byte0, my $byte1, $res->{vendor},
1397 $res->{product}, $res->{revision}) = unpack("C C x6 A8 A16 A4", $buf);
1398
1399 $res->{removable} = $byte1 & 128 ? 1 : 0;
1400 $res->{type} = $byte0 & 31;
1401
1402 return $res;
1403 }
1404
1405 sub path_is_scsi {
1406 my ($path) = @_;
1407
1408 my $fh = IO::File->new("+<$path") || return;
1409 my $res = scsi_inquiry($fh, 1);
1410 close($fh);
1411
1412 return $res;
1413 }
1414
1415 sub print_tabletdevice_full {
1416 my ($conf, $arch) = @_;
1417
1418 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1419
1420 # we use uhci for old VMs because tablet driver was buggy in older qemu
1421 my $usbbus;
1422 if (PVE::QemuServer::Machine::machine_type_is_q35($conf) || $arch eq 'aarch64') {
1423 $usbbus = 'ehci';
1424 } else {
1425 $usbbus = 'uhci';
1426 }
1427
1428 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1429 }
1430
1431 sub print_keyboarddevice_full {
1432 my ($conf, $arch, $machine) = @_;
1433
1434 return if $arch ne 'aarch64';
1435
1436 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1437 }
1438
1439 my sub get_drive_id {
1440 my ($drive) = @_;
1441 return "$drive->{interface}$drive->{index}";
1442 }
1443
1444 sub print_drivedevice_full {
1445 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1446
1447 my $device = '';
1448 my $maxdev = 0;
1449
1450 my $drive_id = get_drive_id($drive);
1451 if ($drive->{interface} eq 'virtio') {
1452 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1453 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1454 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1455 } elsif ($drive->{interface} eq 'scsi') {
1456
1457 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1458 my $unit = $drive->{index} % $maxdev;
1459 my $devicetype = 'hd';
1460 my $path = '';
1461 if (drive_is_cdrom($drive)) {
1462 $devicetype = 'cd';
1463 } else {
1464 if ($drive->{file} =~ m|^/|) {
1465 $path = $drive->{file};
1466 if (my $info = path_is_scsi($path)) {
1467 if ($info->{type} == 0 && $drive->{scsiblock}) {
1468 $devicetype = 'block';
1469 } elsif ($info->{type} == 1) { # tape
1470 $devicetype = 'generic';
1471 }
1472 }
1473 } else {
1474 $path = PVE::Storage::path($storecfg, $drive->{file});
1475 }
1476
1477 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
1478 my $version = extract_version($machine_type, kvm_user_version());
1479 if ($path =~ m/^iscsi\:\/\// &&
1480 !min_version($version, 4, 1)) {
1481 $devicetype = 'generic';
1482 }
1483 }
1484
1485 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1486 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
1487 } else {
1488 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1489 .",lun=$drive->{index}";
1490 }
1491 $device .= ",drive=drive-$drive_id,id=$drive_id";
1492
1493 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1494 $device .= ",rotation_rate=1";
1495 }
1496 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1497
1498 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1499 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1500 my $controller = int($drive->{index} / $maxdev);
1501 my $unit = $drive->{index} % $maxdev;
1502 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1503
1504 $device = "ide-$devicetype";
1505 if ($drive->{interface} eq 'ide') {
1506 $device .= ",bus=ide.$controller,unit=$unit";
1507 } else {
1508 $device .= ",bus=ahci$controller.$unit";
1509 }
1510 $device .= ",drive=drive-$drive_id,id=$drive_id";
1511
1512 if ($devicetype eq 'hd') {
1513 if (my $model = $drive->{model}) {
1514 $model = URI::Escape::uri_unescape($model);
1515 $device .= ",model=$model";
1516 }
1517 if ($drive->{ssd}) {
1518 $device .= ",rotation_rate=1";
1519 }
1520 }
1521 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1522 } elsif ($drive->{interface} eq 'usb') {
1523 die "implement me";
1524 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1525 } else {
1526 die "unsupported interface type";
1527 }
1528
1529 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1530
1531 if (my $serial = $drive->{serial}) {
1532 $serial = URI::Escape::uri_unescape($serial);
1533 $device .= ",serial=$serial";
1534 }
1535
1536
1537 return $device;
1538 }
1539
1540 sub get_initiator_name {
1541 my $initiator;
1542
1543 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1544 while (defined(my $line = <$fh>)) {
1545 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1546 $initiator = $1;
1547 last;
1548 }
1549 $fh->close();
1550
1551 return $initiator;
1552 }
1553
1554 sub print_drive_commandline_full {
1555 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1556
1557 my $path;
1558 my $volid = $drive->{file};
1559 my $format = $drive->{format};
1560 my $drive_id = get_drive_id($drive);
1561
1562 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1563 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1564
1565 if (drive_is_cdrom($drive)) {
1566 $path = get_iso_path($storecfg, $vmid, $volid);
1567 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
1568 } else {
1569 if ($storeid) {
1570 $path = PVE::Storage::path($storecfg, $volid);
1571 $format //= qemu_img_format($scfg, $volname);
1572 } else {
1573 $path = $volid;
1574 $format //= "raw";
1575 }
1576 }
1577
1578 my $is_rbd = $path =~ m/^rbd:/;
1579
1580 my $opts = '';
1581 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1582 foreach my $o (@qemu_drive_options) {
1583 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1584 }
1585
1586 # snapshot only accepts on|off
1587 if (defined($drive->{snapshot})) {
1588 my $v = $drive->{snapshot} ? 'on' : 'off';
1589 $opts .= ",snapshot=$v";
1590 }
1591
1592 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1593 my ($dir, $qmpname) = @$type;
1594 if (my $v = $drive->{"mbps$dir"}) {
1595 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1596 }
1597 if (my $v = $drive->{"mbps${dir}_max"}) {
1598 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1599 }
1600 if (my $v = $drive->{"bps${dir}_max_length"}) {
1601 $opts .= ",throttling.bps$qmpname-max-length=$v";
1602 }
1603 if (my $v = $drive->{"iops${dir}"}) {
1604 $opts .= ",throttling.iops$qmpname=$v";
1605 }
1606 if (my $v = $drive->{"iops${dir}_max"}) {
1607 $opts .= ",throttling.iops$qmpname-max=$v";
1608 }
1609 if (my $v = $drive->{"iops${dir}_max_length"}) {
1610 $opts .= ",throttling.iops$qmpname-max-length=$v";
1611 }
1612 }
1613
1614 if ($pbs_name) {
1615 $format = "rbd" if $is_rbd;
1616 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1617 if !$format;
1618 $opts .= ",format=alloc-track,file.driver=$format";
1619 } elsif ($format) {
1620 $opts .= ",format=$format";
1621 }
1622
1623 my $cache_direct = 0;
1624
1625 if (my $cache = $drive->{cache}) {
1626 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1627 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1628 $opts .= ",cache=none";
1629 $cache_direct = 1;
1630 }
1631
1632 # io_uring with cache mode writeback or writethrough on krbd will hang...
1633 my $rbd_no_io_uring = $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1634
1635 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1636 # sometimes, just plain disable...
1637 my $lvm_no_io_uring = $scfg && $scfg->{type} eq 'lvm';
1638
1639 if (!$drive->{aio}) {
1640 if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring) {
1641 # io_uring supports all cache modes
1642 $opts .= ",aio=io_uring";
1643 } else {
1644 # aio native works only with O_DIRECT
1645 if($cache_direct) {
1646 $opts .= ",aio=native";
1647 } else {
1648 $opts .= ",aio=threads";
1649 }
1650 }
1651 }
1652
1653 if (!drive_is_cdrom($drive)) {
1654 my $detectzeroes;
1655 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1656 $detectzeroes = 'off';
1657 } elsif ($drive->{discard}) {
1658 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1659 } else {
1660 # This used to be our default with discard not being specified:
1661 $detectzeroes = 'on';
1662 }
1663
1664 # note: 'detect-zeroes' works per blockdev and we want it to persist
1665 # after the alloc-track is removed, so put it on 'file' directly
1666 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1667 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1668 }
1669
1670 if ($pbs_name) {
1671 $opts .= ",backing=$pbs_name";
1672 $opts .= ",auto-remove=on";
1673 }
1674
1675 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1676 my $file_param = "file";
1677 if ($pbs_name) {
1678 # non-rbd drivers require the underlying file to be a seperate block
1679 # node, so add a second .file indirection
1680 $file_param .= ".file" if !$is_rbd;
1681 $file_param .= ".filename";
1682 }
1683 my $pathinfo = $path ? "$file_param=$path," : '';
1684
1685 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1686 }
1687
1688 sub print_pbs_blockdev {
1689 my ($pbs_conf, $pbs_name) = @_;
1690 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1691 $blockdev .= ",repository=$pbs_conf->{repository}";
1692 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1693 $blockdev .= ",archive=$pbs_conf->{archive}";
1694 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1695 return $blockdev;
1696 }
1697
1698 sub print_netdevice_full {
1699 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type) = @_;
1700
1701 my $device = $net->{model};
1702 if ($net->{model} eq 'virtio') {
1703 $device = 'virtio-net-pci';
1704 };
1705
1706 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1707 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1708 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1709 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1710 # and out of each queue plus one config interrupt and control vector queue
1711 my $vectors = $net->{queues} * 2 + 2;
1712 $tmpstr .= ",vectors=$vectors,mq=on";
1713 }
1714 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1715
1716 if (my $mtu = $net->{mtu}) {
1717 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1718 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1719 if ($mtu == 1) {
1720 $mtu = $bridge_mtu;
1721 } elsif ($mtu < 576) {
1722 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1723 } elsif ($mtu > $bridge_mtu) {
1724 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1725 }
1726 $tmpstr .= ",host_mtu=$mtu";
1727 } else {
1728 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1729 }
1730 }
1731
1732 if ($use_old_bios_files) {
1733 my $romfile;
1734 if ($device eq 'virtio-net-pci') {
1735 $romfile = 'pxe-virtio.rom';
1736 } elsif ($device eq 'e1000') {
1737 $romfile = 'pxe-e1000.rom';
1738 } elsif ($device eq 'e1000e') {
1739 $romfile = 'pxe-e1000e.rom';
1740 } elsif ($device eq 'ne2k') {
1741 $romfile = 'pxe-ne2k_pci.rom';
1742 } elsif ($device eq 'pcnet') {
1743 $romfile = 'pxe-pcnet.rom';
1744 } elsif ($device eq 'rtl8139') {
1745 $romfile = 'pxe-rtl8139.rom';
1746 }
1747 $tmpstr .= ",romfile=$romfile" if $romfile;
1748 }
1749
1750 return $tmpstr;
1751 }
1752
1753 sub print_netdev_full {
1754 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1755
1756 my $i = '';
1757 if ($netid =~ m/^net(\d+)$/) {
1758 $i = int($1);
1759 }
1760
1761 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1762
1763 my $ifname = "tap${vmid}i$i";
1764
1765 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1766 die "interface name '$ifname' is too long (max 15 character)\n"
1767 if length($ifname) >= 16;
1768
1769 my $vhostparam = '';
1770 if (is_native($arch)) {
1771 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1772 }
1773
1774 my $vmname = $conf->{name} || "vm$vmid";
1775
1776 my $netdev = "";
1777 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1778
1779 if ($net->{bridge}) {
1780 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1781 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1782 } else {
1783 $netdev = "type=user,id=$netid,hostname=$vmname";
1784 }
1785
1786 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1787
1788 return $netdev;
1789 }
1790
1791 my $vga_map = {
1792 'cirrus' => 'cirrus-vga',
1793 'std' => 'VGA',
1794 'vmware' => 'vmware-svga',
1795 'virtio' => 'virtio-vga',
1796 };
1797
1798 sub print_vga_device {
1799 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1800
1801 my $type = $vga_map->{$vga->{type}};
1802 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1803 $type = 'virtio-gpu';
1804 }
1805 my $vgamem_mb = $vga->{memory};
1806
1807 my $max_outputs = '';
1808 if ($qxlnum) {
1809 $type = $id ? 'qxl' : 'qxl-vga';
1810
1811 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1812 # set max outputs so linux can have up to 4 qxl displays with one device
1813 if (min_version($machine_version, 4, 1)) {
1814 $max_outputs = ",max_outputs=4";
1815 }
1816 }
1817 }
1818
1819 die "no devicetype for $vga->{type}\n" if !$type;
1820
1821 my $memory = "";
1822 if ($vgamem_mb) {
1823 if ($vga->{type} eq 'virtio') {
1824 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1825 $memory = ",max_hostmem=$bytes";
1826 } elsif ($qxlnum) {
1827 # from https://www.spice-space.org/multiple-monitors.html
1828 $memory = ",vgamem_mb=$vga->{memory}";
1829 my $ram = $vgamem_mb * 4;
1830 my $vram = $vgamem_mb * 2;
1831 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1832 } else {
1833 $memory = ",vgamem_mb=$vga->{memory}";
1834 }
1835 } elsif ($qxlnum && $id) {
1836 $memory = ",ram_size=67108864,vram_size=33554432";
1837 }
1838
1839 my $edidoff = "";
1840 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1841 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1842 }
1843
1844 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1845 my $vgaid = "vga" . ($id // '');
1846 my $pciaddr;
1847
1848 if ($q35 && $vgaid eq 'vga') {
1849 # the first display uses pcie.0 bus on q35 machines
1850 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1851 } else {
1852 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1853 }
1854
1855 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1856 }
1857
1858 sub parse_number_sets {
1859 my ($set) = @_;
1860 my $res = [];
1861 foreach my $part (split(/;/, $set)) {
1862 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1863 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1864 push @$res, [ $1, $2 ];
1865 } else {
1866 die "invalid range: $part\n";
1867 }
1868 }
1869 return $res;
1870 }
1871
1872 sub parse_numa {
1873 my ($data) = @_;
1874
1875 my $res = parse_property_string($numa_fmt, $data);
1876 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1877 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
1878 return $res;
1879 }
1880
1881 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1882 sub parse_net {
1883 my ($data) = @_;
1884
1885 my $res = eval { parse_property_string($net_fmt, $data) };
1886 if ($@) {
1887 warn $@;
1888 return;
1889 }
1890 if (!defined($res->{macaddr})) {
1891 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1892 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1893 }
1894 return $res;
1895 }
1896
1897 # ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1898 sub parse_ipconfig {
1899 my ($data) = @_;
1900
1901 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1902 if ($@) {
1903 warn $@;
1904 return;
1905 }
1906
1907 if ($res->{gw} && !$res->{ip}) {
1908 warn 'gateway specified without specifying an IP address';
1909 return;
1910 }
1911 if ($res->{gw6} && !$res->{ip6}) {
1912 warn 'IPv6 gateway specified without specifying an IPv6 address';
1913 return;
1914 }
1915 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1916 warn 'gateway specified together with DHCP';
1917 return;
1918 }
1919 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1920 # gw6 + auto/dhcp
1921 warn "IPv6 gateway specified together with $res->{ip6} address";
1922 return;
1923 }
1924
1925 if (!$res->{ip} && !$res->{ip6}) {
1926 return { ip => 'dhcp', ip6 => 'dhcp' };
1927 }
1928
1929 return $res;
1930 }
1931
1932 sub print_net {
1933 my $net = shift;
1934
1935 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1936 }
1937
1938 sub add_random_macs {
1939 my ($settings) = @_;
1940
1941 foreach my $opt (keys %$settings) {
1942 next if $opt !~ m/^net(\d+)$/;
1943 my $net = parse_net($settings->{$opt});
1944 next if !$net;
1945 $settings->{$opt} = print_net($net);
1946 }
1947 }
1948
1949 sub vm_is_volid_owner {
1950 my ($storecfg, $vmid, $volid) = @_;
1951
1952 if ($volid !~ m|^/|) {
1953 my ($path, $owner);
1954 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
1955 if ($owner && ($owner == $vmid)) {
1956 return 1;
1957 }
1958 }
1959
1960 return;
1961 }
1962
1963 sub vmconfig_register_unused_drive {
1964 my ($storecfg, $vmid, $conf, $drive) = @_;
1965
1966 if (drive_is_cloudinit($drive)) {
1967 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
1968 warn $@ if $@;
1969 } elsif (!drive_is_cdrom($drive)) {
1970 my $volid = $drive->{file};
1971 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
1972 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
1973 }
1974 }
1975 }
1976
1977 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
1978 my $smbios1_fmt = {
1979 uuid => {
1980 type => 'string',
1981 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
1982 format_description => 'UUID',
1983 description => "Set SMBIOS1 UUID.",
1984 optional => 1,
1985 },
1986 version => {
1987 type => 'string',
1988 pattern => '[A-Za-z0-9+\/]+={0,2}',
1989 format_description => 'Base64 encoded string',
1990 description => "Set SMBIOS1 version.",
1991 optional => 1,
1992 },
1993 serial => {
1994 type => 'string',
1995 pattern => '[A-Za-z0-9+\/]+={0,2}',
1996 format_description => 'Base64 encoded string',
1997 description => "Set SMBIOS1 serial number.",
1998 optional => 1,
1999 },
2000 manufacturer => {
2001 type => 'string',
2002 pattern => '[A-Za-z0-9+\/]+={0,2}',
2003 format_description => 'Base64 encoded string',
2004 description => "Set SMBIOS1 manufacturer.",
2005 optional => 1,
2006 },
2007 product => {
2008 type => 'string',
2009 pattern => '[A-Za-z0-9+\/]+={0,2}',
2010 format_description => 'Base64 encoded string',
2011 description => "Set SMBIOS1 product ID.",
2012 optional => 1,
2013 },
2014 sku => {
2015 type => 'string',
2016 pattern => '[A-Za-z0-9+\/]+={0,2}',
2017 format_description => 'Base64 encoded string',
2018 description => "Set SMBIOS1 SKU string.",
2019 optional => 1,
2020 },
2021 family => {
2022 type => 'string',
2023 pattern => '[A-Za-z0-9+\/]+={0,2}',
2024 format_description => 'Base64 encoded string',
2025 description => "Set SMBIOS1 family string.",
2026 optional => 1,
2027 },
2028 base64 => {
2029 type => 'boolean',
2030 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2031 optional => 1,
2032 },
2033 };
2034
2035 sub parse_smbios1 {
2036 my ($data) = @_;
2037
2038 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2039 warn $@ if $@;
2040 return $res;
2041 }
2042
2043 sub print_smbios1 {
2044 my ($smbios1) = @_;
2045 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2046 }
2047
2048 PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2049
2050 sub parse_watchdog {
2051 my ($value) = @_;
2052
2053 return if !$value;
2054
2055 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2056 warn $@ if $@;
2057 return $res;
2058 }
2059
2060 sub parse_guest_agent {
2061 my ($conf) = @_;
2062
2063 return {} if !defined($conf->{agent});
2064
2065 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2066 warn $@ if $@;
2067
2068 # if the agent is disabled ignore the other potentially set properties
2069 return {} if !$res->{enabled};
2070 return $res;
2071 }
2072
2073 sub get_qga_key {
2074 my ($conf, $key) = @_;
2075 return undef if !defined($conf->{agent});
2076
2077 my $agent = parse_guest_agent($conf);
2078 return $agent->{$key};
2079 }
2080
2081 sub parse_vga {
2082 my ($value) = @_;
2083
2084 return {} if !$value;
2085 my $res = eval { parse_property_string($vga_fmt, $value) };
2086 warn $@ if $@;
2087 return $res;
2088 }
2089
2090 sub parse_rng {
2091 my ($value) = @_;
2092
2093 return if !$value;
2094
2095 my $res = eval { parse_property_string($rng_fmt, $value) };
2096 warn $@ if $@;
2097 return $res;
2098 }
2099
2100 PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
2101 sub verify_usb_device {
2102 my ($value, $noerr) = @_;
2103
2104 return $value if parse_usb_device($value);
2105
2106 return if $noerr;
2107
2108 die "unable to parse usb device\n";
2109 }
2110
2111 # add JSON properties for create and set function
2112 sub json_config_properties {
2113 my $prop = shift;
2114
2115 foreach my $opt (keys %$confdesc) {
2116 next if $opt eq 'parent' || $opt eq 'snaptime' || $opt eq 'vmstate' ||
2117 $opt eq 'runningmachine' || $opt eq 'runningcpu';
2118 $prop->{$opt} = $confdesc->{$opt};
2119 }
2120
2121 return $prop;
2122 }
2123
2124 # return copy of $confdesc_cloudinit to generate documentation
2125 sub cloudinit_config_properties {
2126
2127 return dclone($confdesc_cloudinit);
2128 }
2129
2130 sub check_type {
2131 my ($key, $value) = @_;
2132
2133 die "unknown setting '$key'\n" if !$confdesc->{$key};
2134
2135 my $type = $confdesc->{$key}->{type};
2136
2137 if (!defined($value)) {
2138 die "got undefined value\n";
2139 }
2140
2141 if ($value =~ m/[\n\r]/) {
2142 die "property contains a line feed\n";
2143 }
2144
2145 if ($type eq 'boolean') {
2146 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2147 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2148 die "type check ('boolean') failed - got '$value'\n";
2149 } elsif ($type eq 'integer') {
2150 return int($1) if $value =~ m/^(\d+)$/;
2151 die "type check ('integer') failed - got '$value'\n";
2152 } elsif ($type eq 'number') {
2153 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2154 die "type check ('number') failed - got '$value'\n";
2155 } elsif ($type eq 'string') {
2156 if (my $fmt = $confdesc->{$key}->{format}) {
2157 PVE::JSONSchema::check_format($fmt, $value);
2158 return $value;
2159 }
2160 $value =~ s/^\"(.*)\"$/$1/;
2161 return $value;
2162 } else {
2163 die "internal error"
2164 }
2165 }
2166
2167 sub destroy_vm {
2168 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2169
2170 my $conf = PVE::QemuConfig->load_config($vmid);
2171
2172 PVE::QemuConfig->check_lock($conf) if !$skiplock;
2173
2174 if ($conf->{template}) {
2175 # check if any base image is still used by a linked clone
2176 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2177 my ($ds, $drive) = @_;
2178 return if drive_is_cdrom($drive);
2179
2180 my $volid = $drive->{file};
2181 return if !$volid || $volid =~ m|^/|;
2182
2183 die "base volume '$volid' is still in use by linked cloned\n"
2184 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2185
2186 });
2187 }
2188
2189 my $volids = {};
2190 my $remove_owned_drive = sub {
2191 my ($ds, $drive) = @_;
2192 return if drive_is_cdrom($drive, 1);
2193
2194 my $volid = $drive->{file};
2195 return if !$volid || $volid =~ m|^/|;
2196 return if $volids->{$volid};
2197
2198 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2199 return if !$path || !$owner || ($owner != $vmid);
2200
2201 $volids->{$volid} = 1;
2202 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2203 warn "Could not remove disk '$volid', check manually: $@" if $@;
2204 };
2205
2206 # only remove disks owned by this VM (referenced in the config)
2207 my $include_opts = {
2208 include_unused => 1,
2209 extra_keys => ['vmstate'],
2210 };
2211 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2212
2213 for my $snap (values %{$conf->{snapshots}}) {
2214 next if !defined($snap->{vmstate});
2215 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2216 next if !defined($drive);
2217 $remove_owned_drive->('vmstate', $drive);
2218 }
2219
2220 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2221
2222 if ($purge_unreferenced) { # also remove unreferenced disk
2223 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2224 PVE::Storage::foreach_volid($vmdisks, sub {
2225 my ($volid, $sid, $volname, $d) = @_;
2226 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2227 warn $@ if $@;
2228 });
2229 }
2230
2231 if (defined $replacement_conf) {
2232 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2233 } else {
2234 PVE::QemuConfig->destroy_config($vmid);
2235 }
2236 }
2237
2238 sub parse_vm_config {
2239 my ($filename, $raw) = @_;
2240
2241 return if !defined($raw);
2242
2243 my $res = {
2244 digest => Digest::SHA::sha1_hex($raw),
2245 snapshots => {},
2246 pending => {},
2247 };
2248
2249 $filename =~ m|/qemu-server/(\d+)\.conf$|
2250 || die "got strange filename '$filename'";
2251
2252 my $vmid = $1;
2253
2254 my $conf = $res;
2255 my $descr;
2256 my $section = '';
2257
2258 my @lines = split(/\n/, $raw);
2259 foreach my $line (@lines) {
2260 next if $line =~ m/^\s*$/;
2261
2262 if ($line =~ m/^\[PENDING\]\s*$/i) {
2263 $section = 'pending';
2264 if (defined($descr)) {
2265 $descr =~ s/\s+$//;
2266 $conf->{description} = $descr;
2267 }
2268 $descr = undef;
2269 $conf = $res->{$section} = {};
2270 next;
2271
2272 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2273 $section = $1;
2274 if (defined($descr)) {
2275 $descr =~ s/\s+$//;
2276 $conf->{description} = $descr;
2277 }
2278 $descr = undef;
2279 $conf = $res->{snapshots}->{$section} = {};
2280 next;
2281 }
2282
2283 if ($line =~ m/^\#(.*)\s*$/) {
2284 $descr = '' if !defined($descr);
2285 $descr .= PVE::Tools::decode_text($1) . "\n";
2286 next;
2287 }
2288
2289 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2290 $descr = '' if !defined($descr);
2291 $descr .= PVE::Tools::decode_text($2);
2292 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2293 $conf->{snapstate} = $1;
2294 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2295 my $key = $1;
2296 my $value = $2;
2297 $conf->{$key} = $value;
2298 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2299 my $value = $1;
2300 if ($section eq 'pending') {
2301 $conf->{delete} = $value; # we parse this later
2302 } else {
2303 warn "vm $vmid - propertry 'delete' is only allowed in [PENDING]\n";
2304 }
2305 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2306 my $key = $1;
2307 my $value = $2;
2308 eval { $value = check_type($key, $value); };
2309 if ($@) {
2310 warn "vm $vmid - unable to parse value of '$key' - $@";
2311 } else {
2312 $key = 'ide2' if $key eq 'cdrom';
2313 my $fmt = $confdesc->{$key}->{format};
2314 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2315 my $v = parse_drive($key, $value);
2316 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2317 $v->{file} = $volid;
2318 $value = print_drive($v);
2319 } else {
2320 warn "vm $vmid - unable to parse value of '$key'\n";
2321 next;
2322 }
2323 }
2324
2325 $conf->{$key} = $value;
2326 }
2327 } else {
2328 warn "vm $vmid - unable to parse config: $line\n";
2329 }
2330 }
2331
2332 if (defined($descr)) {
2333 $descr =~ s/\s+$//;
2334 $conf->{description} = $descr;
2335 }
2336 delete $res->{snapstate}; # just to be sure
2337
2338 return $res;
2339 }
2340
2341 sub write_vm_config {
2342 my ($filename, $conf) = @_;
2343
2344 delete $conf->{snapstate}; # just to be sure
2345
2346 if ($conf->{cdrom}) {
2347 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2348 $conf->{ide2} = $conf->{cdrom};
2349 delete $conf->{cdrom};
2350 }
2351
2352 # we do not use 'smp' any longer
2353 if ($conf->{sockets}) {
2354 delete $conf->{smp};
2355 } elsif ($conf->{smp}) {
2356 $conf->{sockets} = $conf->{smp};
2357 delete $conf->{cores};
2358 delete $conf->{smp};
2359 }
2360
2361 my $used_volids = {};
2362
2363 my $cleanup_config = sub {
2364 my ($cref, $pending, $snapname) = @_;
2365
2366 foreach my $key (keys %$cref) {
2367 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2368 $key eq 'snapstate' || $key eq 'pending';
2369 my $value = $cref->{$key};
2370 if ($key eq 'delete') {
2371 die "propertry 'delete' is only allowed in [PENDING]\n"
2372 if !$pending;
2373 # fixme: check syntax?
2374 next;
2375 }
2376 eval { $value = check_type($key, $value); };
2377 die "unable to parse value of '$key' - $@" if $@;
2378
2379 $cref->{$key} = $value;
2380
2381 if (!$snapname && is_valid_drivename($key)) {
2382 my $drive = parse_drive($key, $value);
2383 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2384 }
2385 }
2386 };
2387
2388 &$cleanup_config($conf);
2389
2390 &$cleanup_config($conf->{pending}, 1);
2391
2392 foreach my $snapname (keys %{$conf->{snapshots}}) {
2393 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2394 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2395 }
2396
2397 # remove 'unusedX' settings if we re-add a volume
2398 foreach my $key (keys %$conf) {
2399 my $value = $conf->{$key};
2400 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2401 delete $conf->{$key};
2402 }
2403 }
2404
2405 my $generate_raw_config = sub {
2406 my ($conf, $pending) = @_;
2407
2408 my $raw = '';
2409
2410 # add description as comment to top of file
2411 if (defined(my $descr = $conf->{description})) {
2412 if ($descr) {
2413 foreach my $cl (split(/\n/, $descr)) {
2414 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2415 }
2416 } else {
2417 $raw .= "#\n" if $pending;
2418 }
2419 }
2420
2421 foreach my $key (sort keys %$conf) {
2422 next if $key =~ /^(digest|description|pending|snapshots)$/;
2423 $raw .= "$key: $conf->{$key}\n";
2424 }
2425 return $raw;
2426 };
2427
2428 my $raw = &$generate_raw_config($conf);
2429
2430 if (scalar(keys %{$conf->{pending}})){
2431 $raw .= "\n[PENDING]\n";
2432 $raw .= &$generate_raw_config($conf->{pending}, 1);
2433 }
2434
2435 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2436 $raw .= "\n[$snapname]\n";
2437 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2438 }
2439
2440 return $raw;
2441 }
2442
2443 sub load_defaults {
2444
2445 my $res = {};
2446
2447 # we use static defaults from our JSON schema configuration
2448 foreach my $key (keys %$confdesc) {
2449 if (defined(my $default = $confdesc->{$key}->{default})) {
2450 $res->{$key} = $default;
2451 }
2452 }
2453
2454 return $res;
2455 }
2456
2457 sub config_list {
2458 my $vmlist = PVE::Cluster::get_vmlist();
2459 my $res = {};
2460 return $res if !$vmlist || !$vmlist->{ids};
2461 my $ids = $vmlist->{ids};
2462 my $nodename = nodename();
2463
2464 foreach my $vmid (keys %$ids) {
2465 my $d = $ids->{$vmid};
2466 next if !$d->{node} || $d->{node} ne $nodename;
2467 next if !$d->{type} || $d->{type} ne 'qemu';
2468 $res->{$vmid}->{exists} = 1;
2469 }
2470 return $res;
2471 }
2472
2473 # test if VM uses local resources (to prevent migration)
2474 sub check_local_resources {
2475 my ($conf, $noerr) = @_;
2476
2477 my @loc_res = ();
2478
2479 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2480 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2481
2482 push @loc_res, "ivshmem" if $conf->{ivshmem};
2483
2484 foreach my $k (keys %$conf) {
2485 next if $k =~ m/^usb/ && ($conf->{$k} =~ m/^spice(?![^,])/);
2486 # sockets are safe: they will recreated be on the target side post-migrate
2487 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2488 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2489 }
2490
2491 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2492
2493 return \@loc_res;
2494 }
2495
2496 # check if used storages are available on all nodes (use by migrate)
2497 sub check_storage_availability {
2498 my ($storecfg, $conf, $node) = @_;
2499
2500 PVE::QemuConfig->foreach_volume($conf, sub {
2501 my ($ds, $drive) = @_;
2502
2503 my $volid = $drive->{file};
2504 return if !$volid;
2505
2506 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2507 return if !$sid;
2508
2509 # check if storage is available on both nodes
2510 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2511 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2512
2513 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2514
2515 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2516 if !$scfg->{content}->{$vtype};
2517 });
2518 }
2519
2520 # list nodes where all VM images are available (used by has_feature API)
2521 sub shared_nodes {
2522 my ($conf, $storecfg) = @_;
2523
2524 my $nodelist = PVE::Cluster::get_nodelist();
2525 my $nodehash = { map { $_ => 1 } @$nodelist };
2526 my $nodename = nodename();
2527
2528 PVE::QemuConfig->foreach_volume($conf, sub {
2529 my ($ds, $drive) = @_;
2530
2531 my $volid = $drive->{file};
2532 return if !$volid;
2533
2534 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2535 if ($storeid) {
2536 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2537 if ($scfg->{disable}) {
2538 $nodehash = {};
2539 } elsif (my $avail = $scfg->{nodes}) {
2540 foreach my $node (keys %$nodehash) {
2541 delete $nodehash->{$node} if !$avail->{$node};
2542 }
2543 } elsif (!$scfg->{shared}) {
2544 foreach my $node (keys %$nodehash) {
2545 delete $nodehash->{$node} if $node ne $nodename
2546 }
2547 }
2548 }
2549 });
2550
2551 return $nodehash
2552 }
2553
2554 sub check_local_storage_availability {
2555 my ($conf, $storecfg) = @_;
2556
2557 my $nodelist = PVE::Cluster::get_nodelist();
2558 my $nodehash = { map { $_ => {} } @$nodelist };
2559
2560 PVE::QemuConfig->foreach_volume($conf, sub {
2561 my ($ds, $drive) = @_;
2562
2563 my $volid = $drive->{file};
2564 return if !$volid;
2565
2566 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2567 if ($storeid) {
2568 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2569
2570 if ($scfg->{disable}) {
2571 foreach my $node (keys %$nodehash) {
2572 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2573 }
2574 } elsif (my $avail = $scfg->{nodes}) {
2575 foreach my $node (keys %$nodehash) {
2576 if (!$avail->{$node}) {
2577 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2578 }
2579 }
2580 }
2581 }
2582 });
2583
2584 foreach my $node (values %$nodehash) {
2585 if (my $unavail = $node->{unavailable_storages}) {
2586 $node->{unavailable_storages} = [ sort keys %$unavail ];
2587 }
2588 }
2589
2590 return $nodehash
2591 }
2592
2593 # Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2594 sub check_running {
2595 my ($vmid, $nocheck, $node) = @_;
2596
2597 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2598 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2599 }
2600
2601 sub vzlist {
2602
2603 my $vzlist = config_list();
2604
2605 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2606
2607 while (defined(my $de = $fd->read)) {
2608 next if $de !~ m/^(\d+)\.pid$/;
2609 my $vmid = $1;
2610 next if !defined($vzlist->{$vmid});
2611 if (my $pid = check_running($vmid)) {
2612 $vzlist->{$vmid}->{pid} = $pid;
2613 }
2614 }
2615
2616 return $vzlist;
2617 }
2618
2619 our $vmstatus_return_properties = {
2620 vmid => get_standard_option('pve-vmid'),
2621 status => {
2622 description => "Qemu process status.",
2623 type => 'string',
2624 enum => ['stopped', 'running'],
2625 },
2626 maxmem => {
2627 description => "Maximum memory in bytes.",
2628 type => 'integer',
2629 optional => 1,
2630 renderer => 'bytes',
2631 },
2632 maxdisk => {
2633 description => "Root disk size in bytes.",
2634 type => 'integer',
2635 optional => 1,
2636 renderer => 'bytes',
2637 },
2638 name => {
2639 description => "VM name.",
2640 type => 'string',
2641 optional => 1,
2642 },
2643 qmpstatus => {
2644 description => "Qemu QMP agent status.",
2645 type => 'string',
2646 optional => 1,
2647 },
2648 pid => {
2649 description => "PID of running qemu process.",
2650 type => 'integer',
2651 optional => 1,
2652 },
2653 uptime => {
2654 description => "Uptime.",
2655 type => 'integer',
2656 optional => 1,
2657 renderer => 'duration',
2658 },
2659 cpus => {
2660 description => "Maximum usable CPUs.",
2661 type => 'number',
2662 optional => 1,
2663 },
2664 lock => {
2665 description => "The current config lock, if any.",
2666 type => 'string',
2667 optional => 1,
2668 },
2669 tags => {
2670 description => "The current configured tags, if any",
2671 type => 'string',
2672 optional => 1,
2673 },
2674 'running-machine' => {
2675 description => "The currently running machine type (if running).",
2676 type => 'string',
2677 optional => 1,
2678 },
2679 'running-qemu' => {
2680 description => "The currently running QEMU version (if running).",
2681 type => 'string',
2682 optional => 1,
2683 },
2684 };
2685
2686 my $last_proc_pid_stat;
2687
2688 # get VM status information
2689 # This must be fast and should not block ($full == false)
2690 # We only query KVM using QMP if $full == true (this can be slow)
2691 sub vmstatus {
2692 my ($opt_vmid, $full) = @_;
2693
2694 my $res = {};
2695
2696 my $storecfg = PVE::Storage::config();
2697
2698 my $list = vzlist();
2699 my $defaults = load_defaults();
2700
2701 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2702
2703 my $cpucount = $cpuinfo->{cpus} || 1;
2704
2705 foreach my $vmid (keys %$list) {
2706 next if $opt_vmid && ($vmid ne $opt_vmid);
2707
2708 my $conf = PVE::QemuConfig->load_config($vmid);
2709
2710 my $d = { vmid => int($vmid) };
2711 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2712
2713 # fixme: better status?
2714 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2715
2716 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2717 if (defined($size)) {
2718 $d->{disk} = 0; # no info available
2719 $d->{maxdisk} = $size;
2720 } else {
2721 $d->{disk} = 0;
2722 $d->{maxdisk} = 0;
2723 }
2724
2725 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2726 * ($conf->{cores} || $defaults->{cores});
2727 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2728 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2729
2730 $d->{name} = $conf->{name} || "VM $vmid";
2731 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2732 : $defaults->{memory}*(1024*1024);
2733
2734 if ($conf->{balloon}) {
2735 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2736 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2737 : $defaults->{shares};
2738 }
2739
2740 $d->{uptime} = 0;
2741 $d->{cpu} = 0;
2742 $d->{mem} = 0;
2743
2744 $d->{netout} = 0;
2745 $d->{netin} = 0;
2746
2747 $d->{diskread} = 0;
2748 $d->{diskwrite} = 0;
2749
2750 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2751
2752 $d->{serial} = 1 if conf_has_serial($conf);
2753 $d->{lock} = $conf->{lock} if $conf->{lock};
2754 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2755
2756 $res->{$vmid} = $d;
2757 }
2758
2759 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2760 foreach my $dev (keys %$netdev) {
2761 next if $dev !~ m/^tap([1-9]\d*)i/;
2762 my $vmid = $1;
2763 my $d = $res->{$vmid};
2764 next if !$d;
2765
2766 $d->{netout} += $netdev->{$dev}->{receive};
2767 $d->{netin} += $netdev->{$dev}->{transmit};
2768
2769 if ($full) {
2770 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2771 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2772 }
2773
2774 }
2775
2776 my $ctime = gettimeofday;
2777
2778 foreach my $vmid (keys %$list) {
2779
2780 my $d = $res->{$vmid};
2781 my $pid = $d->{pid};
2782 next if !$pid;
2783
2784 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2785 next if !$pstat; # not running
2786
2787 my $used = $pstat->{utime} + $pstat->{stime};
2788
2789 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2790
2791 if ($pstat->{vsize}) {
2792 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2793 }
2794
2795 my $old = $last_proc_pid_stat->{$pid};
2796 if (!$old) {
2797 $last_proc_pid_stat->{$pid} = {
2798 time => $ctime,
2799 used => $used,
2800 cpu => 0,
2801 };
2802 next;
2803 }
2804
2805 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
2806
2807 if ($dtime > 1000) {
2808 my $dutime = $used - $old->{used};
2809
2810 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
2811 $last_proc_pid_stat->{$pid} = {
2812 time => $ctime,
2813 used => $used,
2814 cpu => $d->{cpu},
2815 };
2816 } else {
2817 $d->{cpu} = $old->{cpu};
2818 }
2819 }
2820
2821 return $res if !$full;
2822
2823 my $qmpclient = PVE::QMPClient->new();
2824
2825 my $ballooncb = sub {
2826 my ($vmid, $resp) = @_;
2827
2828 my $info = $resp->{'return'};
2829 return if !$info->{max_mem};
2830
2831 my $d = $res->{$vmid};
2832
2833 # use memory assigned to VM
2834 $d->{maxmem} = $info->{max_mem};
2835 $d->{balloon} = $info->{actual};
2836
2837 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
2838 $d->{mem} = $info->{total_mem} - $info->{free_mem};
2839 $d->{freemem} = $info->{free_mem};
2840 }
2841
2842 $d->{ballooninfo} = $info;
2843 };
2844
2845 my $blockstatscb = sub {
2846 my ($vmid, $resp) = @_;
2847 my $data = $resp->{'return'} || [];
2848 my $totalrdbytes = 0;
2849 my $totalwrbytes = 0;
2850
2851 for my $blockstat (@$data) {
2852 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
2853 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
2854
2855 $blockstat->{device} =~ s/drive-//;
2856 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
2857 }
2858 $res->{$vmid}->{diskread} = $totalrdbytes;
2859 $res->{$vmid}->{diskwrite} = $totalwrbytes;
2860 };
2861
2862 my $machinecb = sub {
2863 my ($vmid, $resp) = @_;
2864 my $data = $resp->{'return'} || [];
2865
2866 $res->{$vmid}->{'running-machine'} =
2867 PVE::QemuServer::Machine::current_from_query_machines($data);
2868 };
2869
2870 my $versioncb = sub {
2871 my ($vmid, $resp) = @_;
2872 my $data = $resp->{'return'} // {};
2873 my $version = 'unknown';
2874
2875 if (my $v = $data->{qemu}) {
2876 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
2877 }
2878
2879 $res->{$vmid}->{'running-qemu'} = $version;
2880 };
2881
2882 my $statuscb = sub {
2883 my ($vmid, $resp) = @_;
2884
2885 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
2886 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
2887 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
2888 # this fails if ballon driver is not loaded, so this must be
2889 # the last commnand (following command are aborted if this fails).
2890 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
2891
2892 my $status = 'unknown';
2893 if (!defined($status = $resp->{'return'}->{status})) {
2894 warn "unable to get VM status\n";
2895 return;
2896 }
2897
2898 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
2899 };
2900
2901 foreach my $vmid (keys %$list) {
2902 next if $opt_vmid && ($vmid ne $opt_vmid);
2903 next if !$res->{$vmid}->{pid}; # not running
2904 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
2905 }
2906
2907 $qmpclient->queue_execute(undef, 2);
2908
2909 foreach my $vmid (keys %$list) {
2910 next if $opt_vmid && ($vmid ne $opt_vmid);
2911 next if !$res->{$vmid}->{pid}; #not running
2912
2913 # we can't use the $qmpclient since it might have already aborted on
2914 # 'query-balloon', but this might also fail for older versions...
2915 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
2916 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
2917 }
2918
2919 foreach my $vmid (keys %$list) {
2920 next if $opt_vmid && ($vmid ne $opt_vmid);
2921 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
2922 }
2923
2924 return $res;
2925 }
2926
2927 sub conf_has_serial {
2928 my ($conf) = @_;
2929
2930 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
2931 if ($conf->{"serial$i"}) {
2932 return 1;
2933 }
2934 }
2935
2936 return 0;
2937 }
2938
2939 sub conf_has_audio {
2940 my ($conf, $id) = @_;
2941
2942 $id //= 0;
2943 my $audio = $conf->{"audio$id"};
2944 return if !defined($audio);
2945
2946 my $audioproperties = parse_property_string($audio_fmt, $audio);
2947 my $audiodriver = $audioproperties->{driver} // 'spice';
2948
2949 return {
2950 dev => $audioproperties->{device},
2951 dev_id => "audiodev$id",
2952 backend => $audiodriver,
2953 backend_id => "$audiodriver-backend${id}",
2954 };
2955 }
2956
2957 sub audio_devs {
2958 my ($audio, $audiopciaddr, $machine_version) = @_;
2959
2960 my $devs = [];
2961
2962 my $id = $audio->{dev_id};
2963 my $audiodev = "";
2964 if (min_version($machine_version, 4, 2)) {
2965 $audiodev = ",audiodev=$audio->{backend_id}";
2966 }
2967
2968 if ($audio->{dev} eq 'AC97') {
2969 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
2970 } elsif ($audio->{dev} =~ /intel\-hda$/) {
2971 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
2972 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
2973 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
2974 } else {
2975 die "unkown audio device '$audio->{dev}', implement me!";
2976 }
2977
2978 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
2979
2980 return $devs;
2981 }
2982
2983 sub get_tpm_paths {
2984 my ($vmid) = @_;
2985 return {
2986 socket => "/var/run/qemu-server/$vmid.swtpm",
2987 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
2988 };
2989 }
2990
2991 sub add_tpm_device {
2992 my ($vmid, $devices, $conf) = @_;
2993
2994 return if !$conf->{tpmstate0};
2995
2996 my $paths = get_tpm_paths($vmid);
2997
2998 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
2999 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3000 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3001 }
3002
3003 sub start_swtpm {
3004 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3005
3006 return if !$tpmdrive;
3007
3008 my $state;
3009 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3010 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3011 if ($storeid) {
3012 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3013 } else {
3014 $state = $tpm->{file};
3015 }
3016
3017 my $paths = get_tpm_paths($vmid);
3018
3019 # during migration, we will get state from remote
3020 #
3021 if (!$migration) {
3022 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3023 my $setup_cmd = [
3024 "swtpm_setup",
3025 "--tpmstate",
3026 "file://$state",
3027 "--createek",
3028 "--create-ek-cert",
3029 "--create-platform-cert",
3030 "--lock-nvram",
3031 "--config",
3032 "/etc/swtpm_setup.conf", # do not use XDG configs
3033 "--runas",
3034 "0", # force creation as root, error if not possible
3035 "--not-overwrite", # ignore existing state, do not modify
3036 ];
3037
3038 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3039 # TPM 2.0 supports ECC crypto, use if possible
3040 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3041
3042 run_command($setup_cmd, outfunc => sub {
3043 print "swtpm_setup: $1\n";
3044 });
3045 }
3046
3047 my $emulator_cmd = [
3048 "swtpm",
3049 "socket",
3050 "--tpmstate",
3051 "backend-uri=file://$state,mode=0600",
3052 "--ctrl",
3053 "type=unixio,path=$paths->{socket},mode=0600",
3054 "--pid",
3055 "file=$paths->{pid}",
3056 "--terminate", # terminate on QEMU disconnect
3057 "--daemon",
3058 ];
3059 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3060 run_command($emulator_cmd, outfunc => sub { print $1; });
3061
3062 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3063 while (! -e $paths->{pid}) {
3064 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3065 usleep(50_000);
3066 }
3067
3068 # return untainted PID of swtpm daemon so it can be killed on error
3069 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3070 return $1;
3071 }
3072
3073 sub vga_conf_has_spice {
3074 my ($vga) = @_;
3075
3076 my $vgaconf = parse_vga($vga);
3077 my $vgatype = $vgaconf->{type};
3078 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3079
3080 return $1 || 1;
3081 }
3082
3083 sub is_native($) {
3084 my ($arch) = @_;
3085 return get_host_arch() eq $arch;
3086 }
3087
3088 sub get_vm_arch {
3089 my ($conf) = @_;
3090 return $conf->{arch} // get_host_arch();
3091 }
3092
3093 my $default_machines = {
3094 x86_64 => 'pc',
3095 aarch64 => 'virt',
3096 };
3097
3098 sub get_installed_machine_version {
3099 my ($kvmversion) = @_;
3100 $kvmversion = kvm_user_version() if !defined($kvmversion);
3101 $kvmversion =~ m/^(\d+\.\d+)/;
3102 return $1;
3103 }
3104
3105 sub windows_get_pinned_machine_version {
3106 my ($machine, $base_version, $kvmversion) = @_;
3107
3108 my $pin_version = $base_version;
3109 if (!defined($base_version) ||
3110 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3111 ) {
3112 $pin_version = get_installed_machine_version($kvmversion);
3113 }
3114 if (!$machine || $machine eq 'pc') {
3115 $machine = "pc-i440fx-$pin_version";
3116 } elsif ($machine eq 'q35') {
3117 $machine = "pc-q35-$pin_version";
3118 } elsif ($machine eq 'virt') {
3119 $machine = "virt-$pin_version";
3120 } else {
3121 warn "unknown machine type '$machine', not touching that!\n";
3122 }
3123
3124 return $machine;
3125 }
3126
3127 sub get_vm_machine {
3128 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3129
3130 my $machine = $forcemachine || $conf->{machine};
3131
3132 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3133 $kvmversion //= kvm_user_version();
3134 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3135 # layout which confuses windows quite a bit and may result in various regressions..
3136 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3137 if (windows_version($conf->{ostype})) {
3138 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3139 }
3140 $arch //= 'x86_64';
3141 $machine ||= $default_machines->{$arch};
3142 if ($add_pve_version) {
3143 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3144 $machine .= "+pve$pvever";
3145 }
3146 }
3147
3148 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3149 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3150 $machine = $1 if $is_pxe;
3151
3152 # for version-pinned machines that do not include a pve-version (e.g.
3153 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3154 $machine .= '+pve0';
3155
3156 $machine .= '.pxe' if $is_pxe;
3157 }
3158
3159 return $machine;
3160 }
3161
3162 sub get_ovmf_files($$) {
3163 my ($arch, $efidisk) = @_;
3164
3165 my $types = $OVMF->{$arch}
3166 or die "no OVMF images known for architecture '$arch'\n";
3167
3168 my $type = 'default';
3169 if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3170 $type = $efidisk->{'pre-enrolled-keys'} ? "4m-ms" : "4m";
3171 }
3172
3173 return $types->{$type}->@*;
3174 }
3175
3176 my $Arch2Qemu = {
3177 aarch64 => '/usr/bin/qemu-system-aarch64',
3178 x86_64 => '/usr/bin/qemu-system-x86_64',
3179 };
3180 sub get_command_for_arch($) {
3181 my ($arch) = @_;
3182 return '/usr/bin/kvm' if is_native($arch);
3183
3184 my $cmd = $Arch2Qemu->{$arch}
3185 or die "don't know how to emulate architecture '$arch'\n";
3186 return $cmd;
3187 }
3188
3189 # To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3190 # to use in a QEMU command line (-cpu element), first array_intersect the result
3191 # of query_supported_ with query_understood_. This is necessary because:
3192 #
3193 # a) query_understood_ returns flags the host cannot use and
3194 # b) query_supported_ (rather the QMP call) doesn't actually return CPU
3195 # flags, but CPU settings - with most of them being flags. Those settings
3196 # (and some flags, curiously) cannot be specified as a "-cpu" argument.
3197 #
3198 # query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3199 # expensive. If you need the value returned from this, you can get it much
3200 # cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3201 # $accel being 'kvm' or 'tcg'.
3202 #
3203 # pvestatd calls this function on startup and whenever the QEMU/KVM version
3204 # changes, automatically populating pmxcfs.
3205 #
3206 # Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3207 # since kvm and tcg machines support different flags
3208 #
3209 sub query_supported_cpu_flags {
3210 my ($arch) = @_;
3211
3212 $arch //= get_host_arch();
3213 my $default_machine = $default_machines->{$arch};
3214
3215 my $flags = {};
3216
3217 # FIXME: Once this is merged, the code below should work for ARM as well:
3218 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3219 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3220 $arch eq "aarch64";
3221
3222 my $kvm_supported = defined(kvm_version());
3223 my $qemu_cmd = get_command_for_arch($arch);
3224 my $fakevmid = -1;
3225 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3226
3227 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3228 my $query_supported_run_qemu = sub {
3229 my ($kvm) = @_;
3230
3231 my $flags = {};
3232 my $cmd = [
3233 $qemu_cmd,
3234 '-machine', $default_machine,
3235 '-display', 'none',
3236 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3237 '-mon', 'chardev=qmp,mode=control',
3238 '-pidfile', $pidfile,
3239 '-S', '-daemonize'
3240 ];
3241
3242 if (!$kvm) {
3243 push @$cmd, '-accel', 'tcg';
3244 }
3245
3246 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3247 die "QEMU flag querying VM exited with code " . $rc if $rc;
3248
3249 eval {
3250 my $cmd_result = mon_cmd(
3251 $fakevmid,
3252 'query-cpu-model-expansion',
3253 type => 'full',
3254 model => { name => 'host' }
3255 );
3256
3257 my $props = $cmd_result->{model}->{props};
3258 foreach my $prop (keys %$props) {
3259 next if $props->{$prop} ne '1';
3260 # QEMU returns some flags multiple times, with '_', '.' or '-'
3261 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3262 # We only keep those with underscores, to match /proc/cpuinfo
3263 $prop =~ s/\.|-/_/g;
3264 $flags->{$prop} = 1;
3265 }
3266 };
3267 my $err = $@;
3268
3269 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3270 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3271
3272 die $err if $err;
3273
3274 return [ sort keys %$flags ];
3275 };
3276
3277 # We need to query QEMU twice, since KVM and TCG have different supported flags
3278 PVE::QemuConfig->lock_config($fakevmid, sub {
3279 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3280 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3281
3282 if ($kvm_supported) {
3283 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3284 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3285 }
3286 });
3287
3288 return $flags;
3289 }
3290
3291 # Understood CPU flags are written to a file at 'pve-qemu' compile time
3292 my $understood_cpu_flag_dir = "/usr/share/kvm";
3293 sub query_understood_cpu_flags {
3294 my $arch = get_host_arch();
3295 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3296
3297 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3298 if ! -e $filepath;
3299
3300 my $raw = file_get_contents($filepath);
3301 $raw =~ s/^\s+|\s+$//g;
3302 my @flags = split(/\s+/, $raw);
3303
3304 return \@flags;
3305 }
3306
3307 my sub get_cpuunits {
3308 my ($conf) = @_;
3309 return $conf->{cpuunits} // (PVE::CGroup::cgroup_mode() == 2 ? 100 : 1024);
3310 }
3311 sub config_to_command {
3312 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3313 $pbs_backing) = @_;
3314
3315 my $cmd = [];
3316 my $globalFlags = [];
3317 my $machineFlags = [];
3318 my $rtcFlags = [];
3319 my $devices = [];
3320 my $pciaddr = '';
3321 my $bridges = {};
3322 my $ostype = $conf->{ostype};
3323 my $winversion = windows_version($ostype);
3324 my $kvm = $conf->{kvm};
3325 my $nodename = nodename();
3326
3327 my $arch = get_vm_arch($conf);
3328 my $kvm_binary = get_command_for_arch($arch);
3329 my $kvmver = kvm_user_version($kvm_binary);
3330
3331 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3332 $kvmver //= "undefined";
3333 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3334 }
3335
3336 my $add_pve_version = min_version($kvmver, 4, 1);
3337
3338 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3339 my $machine_version = extract_version($machine_type, $kvmver);
3340 $kvm //= 1 if is_native($arch);
3341
3342 $machine_version =~ m/(\d+)\.(\d+)/;
3343 my ($machine_major, $machine_minor) = ($1, $2);
3344
3345 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3346 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3347 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3348 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3349 ." please upgrade node '$nodename'\n"
3350 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3351 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3352 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3353 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3354 ." node '$nodename'\n";
3355 }
3356
3357 # if a specific +pve version is required for a feature, use $version_guard
3358 # instead of min_version to allow machines to be run with the minimum
3359 # required version
3360 my $required_pve_version = 0;
3361 my $version_guard = sub {
3362 my ($major, $minor, $pve) = @_;
3363 return 0 if !min_version($machine_version, $major, $minor, $pve);
3364 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3365 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3366 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3367 return 1;
3368 };
3369
3370 if ($kvm && !defined kvm_version()) {
3371 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3372 ." or enable in BIOS.\n";
3373 }
3374
3375 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3376 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3377 my $use_old_bios_files = undef;
3378 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3379
3380 my $cpuunits = get_cpuunits($conf);
3381
3382 push @$cmd, $kvm_binary;
3383
3384 push @$cmd, '-id', $vmid;
3385
3386 my $vmname = $conf->{name} || "vm$vmid";
3387
3388 push @$cmd, '-name', $vmname;
3389
3390 push @$cmd, '-no-shutdown';
3391
3392 my $use_virtio = 0;
3393
3394 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3395 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3396 push @$cmd, '-mon', "chardev=qmp,mode=control";
3397
3398 if (min_version($machine_version, 2, 12)) {
3399 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3400 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3401 }
3402
3403 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3404
3405 push @$cmd, '-daemonize';
3406
3407 if ($conf->{smbios1}) {
3408 my $smbios_conf = parse_smbios1($conf->{smbios1});
3409 if ($smbios_conf->{base64}) {
3410 # Do not pass base64 flag to qemu
3411 delete $smbios_conf->{base64};
3412 my $smbios_string = "";
3413 foreach my $key (keys %$smbios_conf) {
3414 my $value;
3415 if ($key eq "uuid") {
3416 $value = $smbios_conf->{uuid}
3417 } else {
3418 $value = decode_base64($smbios_conf->{$key});
3419 }
3420 # qemu accepts any binary data, only commas need escaping by double comma
3421 $value =~ s/,/,,/g;
3422 $smbios_string .= "," . $key . "=" . $value if $value;
3423 }
3424 push @$cmd, '-smbios', "type=1" . $smbios_string;
3425 } else {
3426 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3427 }
3428 }
3429
3430 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3431 my $d;
3432 if (my $efidisk = $conf->{efidisk0}) {
3433 $d = parse_drive('efidisk0', $efidisk);
3434 }
3435
3436 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d);
3437 die "uefi base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3438
3439 my ($path, $format);
3440 my $read_only_str = '';
3441 if ($d) {
3442 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3443 $format = $d->{format};
3444 if ($storeid) {
3445 $path = PVE::Storage::path($storecfg, $d->{file});
3446 if (!defined($format)) {
3447 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3448 $format = qemu_img_format($scfg, $volname);
3449 }
3450 } else {
3451 $path = $d->{file};
3452 die "efidisk format must be specified\n"
3453 if !defined($format);
3454 }
3455
3456 $read_only_str = ',readonly=on' if drive_is_read_only($conf, $d);
3457 } else {
3458 warn "no efidisk configured! Using temporary efivars disk.\n";
3459 $path = "/tmp/$vmid-ovmf.fd";
3460 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3461 $format = 'raw';
3462 }
3463
3464 my $size_str = "";
3465
3466 if ($format eq 'raw' && $version_guard->(4, 1, 2)) {
3467 $size_str = ",size=" . (-s $ovmf_vars);
3468 }
3469
3470 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3471 my $cache = "";
3472 if ($path =~ m/^rbd:/) {
3473 $cache = ',cache=writeback';
3474 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3475 }
3476
3477 push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code";
3478 push @$cmd, '-drive', "if=pflash,unit=1$cache,format=$format,id=drive-efidisk0$size_str,file=${path}${read_only_str}";
3479 }
3480
3481 if ($q35) { # tell QEMU to load q35 config early
3482 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3483 if (min_version($machine_version, 4, 0)) {
3484 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3485 } else {
3486 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3487 }
3488 }
3489
3490 if ($conf->{vmgenid}) {
3491 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3492 }
3493
3494 # add usb controllers
3495 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3496 $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES);
3497 push @$devices, @usbcontrollers if @usbcontrollers;
3498 my $vga = parse_vga($conf->{vga});
3499
3500 my $qxlnum = vga_conf_has_spice($conf->{vga});
3501 $vga->{type} = 'qxl' if $qxlnum;
3502
3503 if (!$vga->{type}) {
3504 if ($arch eq 'aarch64') {
3505 $vga->{type} = 'virtio';
3506 } elsif (min_version($machine_version, 2, 9)) {
3507 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3508 } else {
3509 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3510 }
3511 }
3512
3513 # enable absolute mouse coordinates (needed by vnc)
3514 my $tablet;
3515 if (defined($conf->{tablet})) {
3516 $tablet = $conf->{tablet};
3517 } else {
3518 $tablet = $defaults->{tablet};
3519 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3520 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3521 }
3522
3523 if ($tablet) {
3524 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3525 my $kbd = print_keyboarddevice_full($conf, $arch);
3526 push @$devices, '-device', $kbd if defined($kbd);
3527 }
3528
3529 my $bootorder = device_bootorder($conf);
3530
3531 # host pci device passthrough
3532 my ($kvm_off, $gpu_passthrough, $legacy_igd) = PVE::QemuServer::PCI::print_hostpci_devices(
3533 $vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder);
3534
3535 # usb devices
3536 my $usb_dev_features = {};
3537 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3538
3539 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3540 $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder);
3541 push @$devices, @usbdevices if @usbdevices;
3542
3543 # serial devices
3544 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3545 if (my $path = $conf->{"serial$i"}) {
3546 if ($path eq 'socket') {
3547 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3548 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3549 # On aarch64, serial0 is the UART device. Qemu only allows
3550 # connecting UART devices via the '-serial' command line, as
3551 # the device has a fixed slot on the hardware...
3552 if ($arch eq 'aarch64' && $i == 0) {
3553 push @$devices, '-serial', "chardev:serial$i";
3554 } else {
3555 push @$devices, '-device', "isa-serial,chardev=serial$i";
3556 }
3557 } else {
3558 die "no such serial device\n" if ! -c $path;
3559 push @$devices, '-chardev', "tty,id=serial$i,path=$path";
3560 push @$devices, '-device', "isa-serial,chardev=serial$i";
3561 }
3562 }
3563 }
3564
3565 # parallel devices
3566 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3567 if (my $path = $conf->{"parallel$i"}) {
3568 die "no such parallel device\n" if ! -c $path;
3569 my $devtype = $path =~ m!^/dev/usb/lp! ? 'tty' : 'parport';
3570 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3571 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3572 }
3573 }
3574
3575 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3576 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3577 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3578 push @$devices, @$audio_devs;
3579 }
3580
3581 add_tpm_device($vmid, $devices, $conf);
3582
3583 my $sockets = 1;
3584 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3585 $sockets = $conf->{sockets} if $conf->{sockets};
3586
3587 my $cores = $conf->{cores} || 1;
3588
3589 my $maxcpus = $sockets * $cores;
3590
3591 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3592
3593 my $allowed_vcpus = $cpuinfo->{cpus};
3594
3595 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3596
3597 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3598 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3599 for (my $i = 2; $i <= $vcpus; $i++) {
3600 my $cpustr = print_cpu_device($conf,$i);
3601 push @$cmd, '-device', $cpustr;
3602 }
3603
3604 } else {
3605
3606 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3607 }
3608 push @$cmd, '-nodefaults';
3609
3610 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3611
3612 push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3613
3614 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3615
3616 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3617 push @$devices, '-device', print_vga_device(
3618 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3619 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3620 push @$cmd, '-vnc', "unix:$socket,password=on";
3621 } else {
3622 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3623 push @$cmd, '-nographic';
3624 }
3625
3626 # time drift fix
3627 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3628 my $useLocaltime = $conf->{localtime};
3629
3630 if ($winversion >= 5) { # windows
3631 $useLocaltime = 1 if !defined($conf->{localtime});
3632
3633 # use time drift fix when acpi is enabled
3634 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3635 $tdf = 1 if !defined($conf->{tdf});
3636 }
3637 }
3638
3639 if ($winversion >= 6) {
3640 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3641 push @$cmd, '-no-hpet';
3642 }
3643
3644 push @$rtcFlags, 'driftfix=slew' if $tdf;
3645
3646 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3647 push @$rtcFlags, "base=$conf->{startdate}";
3648 } elsif ($useLocaltime) {
3649 push @$rtcFlags, 'base=localtime';
3650 }
3651
3652 if ($forcecpu) {
3653 push @$cmd, '-cpu', $forcecpu;
3654 } else {
3655 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3656 }
3657
3658 PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
3659
3660 push @$cmd, '-S' if $conf->{freeze};
3661
3662 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3663
3664 my $guest_agent = parse_guest_agent($conf);
3665
3666 if ($guest_agent->{enabled}) {
3667 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3668 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3669
3670 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3671 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3672 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3673 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3674 } elsif ($guest_agent->{type} eq 'isa') {
3675 push @$devices, '-device', "isa-serial,chardev=qga0";
3676 }
3677 }
3678
3679 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3680 if ($rng && $version_guard->(4, 1, 2)) {
3681 check_rng_source($rng->{source});
3682
3683 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3684 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3685 my $limiter_str = "";
3686 if ($max_bytes) {
3687 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3688 }
3689
3690 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3691 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3692 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3693 }
3694
3695 my $spice_port;
3696
3697 if ($qxlnum) {
3698 if ($qxlnum > 1) {
3699 if ($winversion){
3700 for (my $i = 1; $i < $qxlnum; $i++){
3701 push @$devices, '-device', print_vga_device(
3702 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3703 }
3704 } else {
3705 # assume other OS works like Linux
3706 my ($ram, $vram) = ("134217728", "67108864");
3707 if ($vga->{memory}) {
3708 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3709 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3710 }
3711 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3712 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3713 }
3714 }
3715
3716 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3717
3718 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3719 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3720 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3721
3722 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3723 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3724 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3725
3726 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3727 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3728
3729 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3730 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3731 if ($spice_enhancement->{foldersharing}) {
3732 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3733 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3734 }
3735
3736 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3737 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3738 if $spice_enhancement->{videostreaming};
3739
3740 push @$devices, '-spice', "$spice_opts";
3741 }
3742
3743 # enable balloon by default, unless explicitly disabled
3744 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3745 $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3746 push @$devices, '-device', "virtio-balloon-pci,id=balloon0$pciaddr";
3747 }
3748
3749 if ($conf->{watchdog}) {
3750 my $wdopts = parse_watchdog($conf->{watchdog});
3751 $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3752 my $watchdog = $wdopts->{model} || 'i6300esb';
3753 push @$devices, '-device', "$watchdog$pciaddr";
3754 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3755 }
3756
3757 my $vollist = [];
3758 my $scsicontroller = {};
3759 my $ahcicontroller = {};
3760 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3761
3762 # Add iscsi initiator name if available
3763 if (my $initiator = get_initiator_name()) {
3764 push @$devices, '-iscsi', "initiator-name=$initiator";
3765 }
3766
3767 PVE::QemuConfig->foreach_volume($conf, sub {
3768 my ($ds, $drive) = @_;
3769
3770 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3771 check_volume_storage_type($storecfg, $drive->{file});
3772 push @$vollist, $drive->{file};
3773 }
3774
3775 # ignore efidisk here, already added in bios/fw handling code above
3776 return if $drive->{interface} eq 'efidisk';
3777 # similar for TPM
3778 return if $drive->{interface} eq 'tpmstate';
3779
3780 $use_virtio = 1 if $ds =~ m/^virtio/;
3781
3782 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3783
3784 if ($drive->{interface} eq 'virtio'){
3785 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
3786 }
3787
3788 if ($drive->{interface} eq 'scsi') {
3789
3790 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
3791
3792 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
3793 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
3794
3795 $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
3796 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
3797
3798 my $iothread = '';
3799 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
3800 $iothread .= ",iothread=iothread-$controller_prefix$controller";
3801 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
3802 } elsif ($drive->{iothread}) {
3803 warn "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n";
3804 }
3805
3806 my $queues = '';
3807 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
3808 $queues = ",num_queues=$drive->{queues}";
3809 }
3810
3811 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
3812 if !$scsicontroller->{$controller};
3813 $scsicontroller->{$controller}=1;
3814 }
3815
3816 if ($drive->{interface} eq 'sata') {
3817 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
3818 $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
3819 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
3820 if !$ahcicontroller->{$controller};
3821 $ahcicontroller->{$controller}=1;
3822 }
3823
3824 my $pbs_conf = $pbs_backing->{$ds};
3825 my $pbs_name = undef;
3826 if ($pbs_conf) {
3827 $pbs_name = "drive-$ds-pbs";
3828 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
3829 }
3830
3831 my $drive_cmd = print_drive_commandline_full(
3832 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
3833
3834 # extra protection for templates, but SATA and IDE don't support it..
3835 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
3836
3837 push @$devices, '-drive',$drive_cmd;
3838 push @$devices, '-device', print_drivedevice_full(
3839 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
3840 });
3841
3842 for (my $i = 0; $i < $MAX_NETS; $i++) {
3843 my $netname = "net$i";
3844
3845 next if !$conf->{$netname};
3846 my $d = parse_net($conf->{$netname});
3847 next if !$d;
3848
3849 $use_virtio = 1 if $d->{model} eq 'virtio';
3850
3851 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
3852
3853 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
3854 push @$devices, '-netdev', $netdevfull;
3855
3856 my $netdevicefull = print_netdevice_full(
3857 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type);
3858
3859 push @$devices, '-device', $netdevicefull;
3860 }
3861
3862 if ($conf->{ivshmem}) {
3863 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
3864
3865 my $bus;
3866 if ($q35) {
3867 $bus = print_pcie_addr("ivshmem");
3868 } else {
3869 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
3870 }
3871
3872 my $ivshmem_name = $ivshmem->{name} // $vmid;
3873 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
3874
3875 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
3876 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
3877 .",size=$ivshmem->{size}M";
3878 }
3879
3880 # pci.4 is nested in pci.1
3881 $bridges->{1} = 1 if $bridges->{4};
3882
3883 if (!$q35) {
3884 # add pci bridges
3885 if (min_version($machine_version, 2, 3)) {
3886 $bridges->{1} = 1;
3887 $bridges->{2} = 1;
3888 }
3889
3890 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
3891
3892 }
3893
3894 for my $k (sort {$b cmp $a} keys %$bridges) {
3895 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
3896
3897 my $k_name = $k;
3898 if ($k == 2 && $legacy_igd) {
3899 $k_name = "$k-igd";
3900 }
3901 $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
3902
3903 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
3904 if ($q35) {
3905 # add after -readconfig pve-q35.cfg
3906 splice @$devices, 2, 0, '-device', $devstr;
3907 } else {
3908 unshift @$devices, '-device', $devstr if $k > 0;
3909 }
3910 }
3911
3912 if (!$kvm) {
3913 push @$machineFlags, 'accel=tcg';
3914 }
3915
3916 my $machine_type_min = $machine_type;
3917 if ($add_pve_version) {
3918 $machine_type_min =~ s/\+pve\d+$//;
3919 $machine_type_min .= "+pve$required_pve_version";
3920 }
3921 push @$machineFlags, "type=${machine_type_min}";
3922
3923 push @$cmd, @$devices;
3924 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
3925 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
3926 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
3927
3928 if (my $vmstate = $conf->{vmstate}) {
3929 my $statepath = PVE::Storage::path($storecfg, $vmstate);
3930 push @$vollist, $vmstate;
3931 push @$cmd, '-loadstate', $statepath;
3932 print "activating and using '$vmstate' as vmstate\n";
3933 }
3934
3935 if (PVE::QemuConfig->is_template($conf)) {
3936 # needed to workaround base volumes being read-only
3937 push @$cmd, '-snapshot';
3938 }
3939
3940 # add custom args
3941 if ($conf->{args}) {
3942 my $aa = PVE::Tools::split_args($conf->{args});
3943 push @$cmd, @$aa;
3944 }
3945
3946 return wantarray ? ($cmd, $vollist, $spice_port) : $cmd;
3947 }
3948
3949 sub check_rng_source {
3950 my ($source) = @_;
3951
3952 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
3953 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
3954 if ! -e $source;
3955
3956 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
3957 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
3958 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
3959 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
3960 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
3961 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
3962 ." to the host.\n";
3963 }
3964 }
3965
3966 sub spice_port {
3967 my ($vmid) = @_;
3968
3969 my $res = mon_cmd($vmid, 'query-spice');
3970
3971 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
3972 }
3973
3974 sub vm_devices_list {
3975 my ($vmid) = @_;
3976
3977 my $res = mon_cmd($vmid, 'query-pci');
3978 my $devices_to_check = [];
3979 my $devices = {};
3980 foreach my $pcibus (@$res) {
3981 push @$devices_to_check, @{$pcibus->{devices}},
3982 }
3983
3984 while (@$devices_to_check) {
3985 my $to_check = [];
3986 for my $d (@$devices_to_check) {
3987 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
3988 next if !$d->{'pci_bridge'};
3989
3990 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
3991 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
3992 }
3993 $devices_to_check = $to_check;
3994 }
3995
3996 my $resblock = mon_cmd($vmid, 'query-block');
3997 foreach my $block (@$resblock) {
3998 if($block->{device} =~ m/^drive-(\S+)/){
3999 $devices->{$1} = 1;
4000 }
4001 }
4002
4003 my $resmice = mon_cmd($vmid, 'query-mice');
4004 foreach my $mice (@$resmice) {
4005 if ($mice->{name} eq 'QEMU HID Tablet') {
4006 $devices->{tablet} = 1;
4007 last;
4008 }
4009 }
4010
4011 # for usb devices there is no query-usb
4012 # but we can iterate over the entries in
4013 # qom-list path=/machine/peripheral
4014 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4015 foreach my $per (@$resperipheral) {
4016 if ($per->{name} =~ m/^usb\d+$/) {
4017 $devices->{$per->{name}} = 1;
4018 }
4019 }
4020
4021 return $devices;
4022 }
4023
4024 sub vm_deviceplug {
4025 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4026
4027 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4028
4029 my $devices_list = vm_devices_list($vmid);
4030 return 1 if defined($devices_list->{$deviceid});
4031
4032 # add PCI bridge if we need it for the device
4033 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4034
4035 if ($deviceid eq 'tablet') {
4036
4037 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4038
4039 } elsif ($deviceid eq 'keyboard') {
4040
4041 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4042
4043 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4044
4045 die "usb hotplug currently not reliable\n";
4046 # since we can't reliably hot unplug all added usb devices and usb
4047 # passthrough breaks live migration we disable usb hotplugging for now
4048 #qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device));
4049
4050 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4051
4052 qemu_iothread_add($vmid, $deviceid, $device);
4053
4054 qemu_driveadd($storecfg, $vmid, $device);
4055 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4056
4057 qemu_deviceadd($vmid, $devicefull);
4058 eval { qemu_deviceaddverify($vmid, $deviceid); };
4059 if (my $err = $@) {
4060 eval { qemu_drivedel($vmid, $deviceid); };
4061 warn $@ if $@;
4062 die $err;
4063 }
4064
4065 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4066
4067
4068 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4069 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4070 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4071
4072 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4073
4074 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4075 qemu_iothread_add($vmid, $deviceid, $device);
4076 $devicefull .= ",iothread=iothread-$deviceid";
4077 }
4078
4079 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4080 $devicefull .= ",num_queues=$device->{queues}";
4081 }
4082
4083 qemu_deviceadd($vmid, $devicefull);
4084 qemu_deviceaddverify($vmid, $deviceid);
4085
4086 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4087
4088 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4089 qemu_driveadd($storecfg, $vmid, $device);
4090
4091 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4092 eval { qemu_deviceadd($vmid, $devicefull); };
4093 if (my $err = $@) {
4094 eval { qemu_drivedel($vmid, $deviceid); };
4095 warn $@ if $@;
4096 die $err;
4097 }
4098
4099 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4100
4101 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4102
4103 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4104 my $use_old_bios_files = undef;
4105 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4106
4107 my $netdevicefull = print_netdevice_full(
4108 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type);
4109 qemu_deviceadd($vmid, $netdevicefull);
4110 eval {
4111 qemu_deviceaddverify($vmid, $deviceid);
4112 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4113 };
4114 if (my $err = $@) {
4115 eval { qemu_netdevdel($vmid, $deviceid); };
4116 warn $@ if $@;
4117 die $err;
4118 }
4119
4120 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4121
4122 my $bridgeid = $2;
4123 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4124 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4125
4126 qemu_deviceadd($vmid, $devicefull);
4127 qemu_deviceaddverify($vmid, $deviceid);
4128
4129 } else {
4130 die "can't hotplug device '$deviceid'\n";
4131 }
4132
4133 return 1;
4134 }
4135
4136 # fixme: this should raise exceptions on error!
4137 sub vm_deviceunplug {
4138 my ($vmid, $conf, $deviceid) = @_;
4139
4140 my $devices_list = vm_devices_list($vmid);
4141 return 1 if !defined($devices_list->{$deviceid});
4142
4143 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4144 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4145
4146 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard') {
4147 qemu_devicedel($vmid, $deviceid);
4148 } elsif ($deviceid =~ m/^usb\d+$/) {
4149 die "usb hotplug currently not reliable\n";
4150 # when unplugging usb devices this way, there may be remaining usb
4151 # controllers/hubs so we disable it for now
4152 #qemu_devicedel($vmid, $deviceid);
4153 #qemu_devicedelverify($vmid, $deviceid);
4154 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4155 my $device = parse_drive($deviceid, $conf->{$deviceid});
4156
4157 qemu_devicedel($vmid, $deviceid);
4158 qemu_devicedelverify($vmid, $deviceid);
4159 qemu_drivedel($vmid, $deviceid);
4160 qemu_iothread_del($vmid, $deviceid, $device);
4161 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4162 qemu_devicedel($vmid, $deviceid);
4163 qemu_devicedelverify($vmid, $deviceid);
4164 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4165 my $device = parse_drive($deviceid, $conf->{$deviceid});
4166
4167 qemu_devicedel($vmid, $deviceid);
4168 qemu_drivedel($vmid, $deviceid);
4169 qemu_deletescsihw($conf, $vmid, $deviceid);
4170
4171 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4172 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4173 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4174 qemu_devicedel($vmid, $deviceid);
4175 qemu_devicedelverify($vmid, $deviceid);
4176 qemu_netdevdel($vmid, $deviceid);
4177 } else {
4178 die "can't unplug device '$deviceid'\n";
4179 }
4180
4181 return 1;
4182 }
4183
4184 sub qemu_deviceadd {
4185 my ($vmid, $devicefull) = @_;
4186
4187 $devicefull = "driver=".$devicefull;
4188 my %options = split(/[=,]/, $devicefull);
4189
4190 mon_cmd($vmid, "device_add" , %options);
4191 }
4192
4193 sub qemu_devicedel {
4194 my ($vmid, $deviceid) = @_;
4195
4196 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
4197 }
4198
4199 sub qemu_iothread_add {
4200 my ($vmid, $deviceid, $device) = @_;
4201
4202 if ($device->{iothread}) {
4203 my $iothreads = vm_iothreads_list($vmid);
4204 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4205 }
4206 }
4207
4208 sub qemu_iothread_del {
4209 my ($vmid, $deviceid, $device) = @_;
4210
4211 if ($device->{iothread}) {
4212 my $iothreads = vm_iothreads_list($vmid);
4213 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4214 }
4215 }
4216
4217 sub qemu_objectadd {
4218 my ($vmid, $objectid, $qomtype) = @_;
4219
4220 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4221
4222 return 1;
4223 }
4224
4225 sub qemu_objectdel {
4226 my ($vmid, $objectid) = @_;
4227
4228 mon_cmd($vmid, "object-del", id => $objectid);
4229
4230 return 1;
4231 }
4232
4233 sub qemu_driveadd {
4234 my ($storecfg, $vmid, $device) = @_;
4235
4236 my $kvmver = get_running_qemu_version($vmid);
4237 my $io_uring = min_version($kvmver, 6, 0);
4238 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4239 $drive =~ s/\\/\\\\/g;
4240 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4241
4242 # If the command succeeds qemu prints: "OK"
4243 return 1 if $ret =~ m/OK/s;
4244
4245 die "adding drive failed: $ret\n";
4246 }
4247
4248 sub qemu_drivedel {
4249 my ($vmid, $deviceid) = @_;
4250
4251 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4252 $ret =~ s/^\s+//;
4253
4254 return 1 if $ret eq "";
4255
4256 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4257 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4258
4259 die "deleting drive $deviceid failed : $ret\n";
4260 }
4261
4262 sub qemu_deviceaddverify {
4263 my ($vmid, $deviceid) = @_;
4264
4265 for (my $i = 0; $i <= 5; $i++) {
4266 my $devices_list = vm_devices_list($vmid);
4267 return 1 if defined($devices_list->{$deviceid});
4268 sleep 1;
4269 }
4270
4271 die "error on hotplug device '$deviceid'\n";
4272 }
4273
4274
4275 sub qemu_devicedelverify {
4276 my ($vmid, $deviceid) = @_;
4277
4278 # need to verify that the device is correctly removed as device_del
4279 # is async and empty return is not reliable
4280
4281 for (my $i = 0; $i <= 5; $i++) {
4282 my $devices_list = vm_devices_list($vmid);
4283 return 1 if !defined($devices_list->{$deviceid});
4284 sleep 1;
4285 }
4286
4287 die "error on hot-unplugging device '$deviceid'\n";
4288 }
4289
4290 sub qemu_findorcreatescsihw {
4291 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4292
4293 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4294
4295 my $scsihwid="$controller_prefix$controller";
4296 my $devices_list = vm_devices_list($vmid);
4297
4298 if (!defined($devices_list->{$scsihwid})) {
4299 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4300 }
4301
4302 return 1;
4303 }
4304
4305 sub qemu_deletescsihw {
4306 my ($conf, $vmid, $opt) = @_;
4307
4308 my $device = parse_drive($opt, $conf->{$opt});
4309
4310 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4311 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4312 return 1;
4313 }
4314
4315 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4316
4317 my $devices_list = vm_devices_list($vmid);
4318 foreach my $opt (keys %{$devices_list}) {
4319 if (is_valid_drivename($opt)) {
4320 my $drive = parse_drive($opt, $conf->{$opt});
4321 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4322 return 1;
4323 }
4324 }
4325 }
4326
4327 my $scsihwid="scsihw$controller";
4328
4329 vm_deviceunplug($vmid, $conf, $scsihwid);
4330
4331 return 1;
4332 }
4333
4334 sub qemu_add_pci_bridge {
4335 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4336
4337 my $bridges = {};
4338
4339 my $bridgeid;
4340
4341 print_pci_addr($device, $bridges, $arch, $machine_type);
4342
4343 while (my ($k, $v) = each %$bridges) {
4344 $bridgeid = $k;
4345 }
4346 return 1 if !defined($bridgeid) || $bridgeid < 1;
4347
4348 my $bridge = "pci.$bridgeid";
4349 my $devices_list = vm_devices_list($vmid);
4350
4351 if (!defined($devices_list->{$bridge})) {
4352 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4353 }
4354
4355 return 1;
4356 }
4357
4358 sub qemu_set_link_status {
4359 my ($vmid, $device, $up) = @_;
4360
4361 mon_cmd($vmid, "set_link", name => $device,
4362 up => $up ? JSON::true : JSON::false);
4363 }
4364
4365 sub qemu_netdevadd {
4366 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4367
4368 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4369 my %options = split(/[=,]/, $netdev);
4370
4371 if (defined(my $vhost = $options{vhost})) {
4372 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4373 }
4374
4375 if (defined(my $queues = $options{queues})) {
4376 $options{queues} = $queues + 0;
4377 }
4378
4379 mon_cmd($vmid, "netdev_add", %options);
4380 return 1;
4381 }
4382
4383 sub qemu_netdevdel {
4384 my ($vmid, $deviceid) = @_;
4385
4386 mon_cmd($vmid, "netdev_del", id => $deviceid);
4387 }
4388
4389 sub qemu_usb_hotplug {
4390 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4391
4392 return if !$device;
4393
4394 # remove the old one first
4395 vm_deviceunplug($vmid, $conf, $deviceid);
4396
4397 # check if xhci controller is necessary and available
4398 if ($device->{usb3}) {
4399
4400 my $devicelist = vm_devices_list($vmid);
4401
4402 if (!$devicelist->{xhci}) {
4403 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4404 qemu_deviceadd($vmid, "nec-usb-xhci,id=xhci$pciaddr");
4405 }
4406 }
4407 my $d = parse_usb_device($device->{host});
4408 $d->{usb3} = $device->{usb3};
4409
4410 # add the new one
4411 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d, $arch, $machine_type);
4412 }
4413
4414 sub qemu_cpu_hotplug {
4415 my ($vmid, $conf, $vcpus) = @_;
4416
4417 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4418
4419 my $sockets = 1;
4420 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4421 $sockets = $conf->{sockets} if $conf->{sockets};
4422 my $cores = $conf->{cores} || 1;
4423 my $maxcpus = $sockets * $cores;
4424
4425 $vcpus = $maxcpus if !$vcpus;
4426
4427 die "you can't add more vcpus than maxcpus\n"
4428 if $vcpus > $maxcpus;
4429
4430 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4431
4432 if ($vcpus < $currentvcpus) {
4433
4434 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4435
4436 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4437 qemu_devicedel($vmid, "cpu$i");
4438 my $retry = 0;
4439 my $currentrunningvcpus = undef;
4440 while (1) {
4441 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4442 last if scalar(@{$currentrunningvcpus}) == $i-1;
4443 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4444 $retry++;
4445 sleep 1;
4446 }
4447 #update conf after each succesfull cpu unplug
4448 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4449 PVE::QemuConfig->write_config($vmid, $conf);
4450 }
4451 } else {
4452 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4453 }
4454
4455 return;
4456 }
4457
4458 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4459 die "vcpus in running vm does not match its configuration\n"
4460 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4461
4462 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4463
4464 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4465 my $cpustr = print_cpu_device($conf, $i);
4466 qemu_deviceadd($vmid, $cpustr);
4467
4468 my $retry = 0;
4469 my $currentrunningvcpus = undef;
4470 while (1) {
4471 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4472 last if scalar(@{$currentrunningvcpus}) == $i;
4473 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4474 sleep 1;
4475 $retry++;
4476 }
4477 #update conf after each succesfull cpu hotplug
4478 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4479 PVE::QemuConfig->write_config($vmid, $conf);
4480 }
4481 } else {
4482
4483 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4484 mon_cmd($vmid, "cpu-add", id => int($i));
4485 }
4486 }
4487 }
4488
4489 sub qemu_block_set_io_throttle {
4490 my ($vmid, $deviceid,
4491 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4492 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4493 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4494 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4495
4496 return if !check_running($vmid) ;
4497
4498 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4499 bps => int($bps),
4500 bps_rd => int($bps_rd),
4501 bps_wr => int($bps_wr),
4502 iops => int($iops),
4503 iops_rd => int($iops_rd),
4504 iops_wr => int($iops_wr),
4505 bps_max => int($bps_max),
4506 bps_rd_max => int($bps_rd_max),
4507 bps_wr_max => int($bps_wr_max),
4508 iops_max => int($iops_max),
4509 iops_rd_max => int($iops_rd_max),
4510 iops_wr_max => int($iops_wr_max),
4511 bps_max_length => int($bps_max_length),
4512 bps_rd_max_length => int($bps_rd_max_length),
4513 bps_wr_max_length => int($bps_wr_max_length),
4514 iops_max_length => int($iops_max_length),
4515 iops_rd_max_length => int($iops_rd_max_length),
4516 iops_wr_max_length => int($iops_wr_max_length),
4517 );
4518
4519 }
4520
4521 sub qemu_block_resize {
4522 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4523
4524 my $running = check_running($vmid);
4525
4526 $size = 0 if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4527
4528 return if !$running;
4529
4530 my $padding = (1024 - $size % 1024) % 1024;
4531 $size = $size + $padding;
4532
4533 mon_cmd(
4534 $vmid,
4535 "block_resize",
4536 device => $deviceid,
4537 size => int($size),
4538 timeout => 60,
4539 );
4540 }
4541
4542 sub qemu_volume_snapshot {
4543 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4544
4545 my $running = check_running($vmid);
4546
4547 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4548 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4549 } else {
4550 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4551 }
4552 }
4553
4554 sub qemu_volume_snapshot_delete {
4555 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4556
4557 my $running = check_running($vmid);
4558
4559 if($running) {
4560
4561 $running = undef;
4562 my $conf = PVE::QemuConfig->load_config($vmid);
4563 PVE::QemuConfig->foreach_volume($conf, sub {
4564 my ($ds, $drive) = @_;
4565 $running = 1 if $drive->{file} eq $volid;
4566 });
4567 }
4568
4569 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4570 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
4571 } else {
4572 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4573 }
4574 }
4575
4576 sub set_migration_caps {
4577 my ($vmid, $savevm) = @_;
4578
4579 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4580
4581 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4582 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4583
4584 my $cap_ref = [];
4585
4586 my $enabled_cap = {
4587 "auto-converge" => 1,
4588 "xbzrle" => 1,
4589 "x-rdma-pin-all" => 0,
4590 "zero-blocks" => 0,
4591 "compress" => 0,
4592 "dirty-bitmaps" => $dirty_bitmaps,
4593 };
4594
4595 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4596
4597 for my $supported_capability (@$supported_capabilities) {
4598 push @$cap_ref, {
4599 capability => $supported_capability->{capability},
4600 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4601 };
4602 }
4603
4604 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4605 }
4606
4607 sub foreach_volid {
4608 my ($conf, $func, @param) = @_;
4609
4610 my $volhash = {};
4611
4612 my $test_volid = sub {
4613 my ($key, $drive, $snapname) = @_;
4614
4615 my $volid = $drive->{file};
4616 return if !$volid;
4617
4618 $volhash->{$volid}->{cdrom} //= 1;
4619 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4620
4621 my $replicate = $drive->{replicate} // 1;
4622 $volhash->{$volid}->{replicate} //= 0;
4623 $volhash->{$volid}->{replicate} = 1 if $replicate;
4624
4625 $volhash->{$volid}->{shared} //= 0;
4626 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4627
4628 $volhash->{$volid}->{referenced_in_config} //= 0;
4629 $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname);
4630
4631 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4632 if defined($snapname);
4633
4634 my $size = $drive->{size};
4635 $volhash->{$volid}->{size} //= $size if $size;
4636
4637 $volhash->{$volid}->{is_vmstate} //= 0;
4638 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4639
4640 $volhash->{$volid}->{is_tpmstate} //= 0;
4641 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4642
4643 $volhash->{$volid}->{is_unused} //= 0;
4644 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4645
4646 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4647 };
4648
4649 my $include_opts = {
4650 extra_keys => ['vmstate'],
4651 include_unused => 1,
4652 };
4653
4654 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4655 foreach my $snapname (keys %{$conf->{snapshots}}) {
4656 my $snap = $conf->{snapshots}->{$snapname};
4657 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4658 }
4659
4660 foreach my $volid (keys %$volhash) {
4661 &$func($volid, $volhash->{$volid}, @param);
4662 }
4663 }
4664
4665 my $fast_plug_option = {
4666 'lock' => 1,
4667 'name' => 1,
4668 'onboot' => 1,
4669 'shares' => 1,
4670 'startup' => 1,
4671 'description' => 1,
4672 'protection' => 1,
4673 'vmstatestorage' => 1,
4674 'hookscript' => 1,
4675 'tags' => 1,
4676 };
4677
4678 # hotplug changes in [PENDING]
4679 # $selection hash can be used to only apply specified options, for
4680 # example: { cores => 1 } (only apply changed 'cores')
4681 # $errors ref is used to return error messages
4682 sub vmconfig_hotplug_pending {
4683 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4684
4685 my $defaults = load_defaults();
4686 my $arch = get_vm_arch($conf);
4687 my $machine_type = get_vm_machine($conf, undef, $arch);
4688
4689 # commit values which do not have any impact on running VM first
4690 # Note: those option cannot raise errors, we we do not care about
4691 # $selection and always apply them.
4692
4693 my $add_error = sub {
4694 my ($opt, $msg) = @_;
4695 $errors->{$opt} = "hotplug problem - $msg";
4696 };
4697
4698 my $changes = 0;
4699 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4700 if ($fast_plug_option->{$opt}) {
4701 $conf->{$opt} = $conf->{pending}->{$opt};
4702 delete $conf->{pending}->{$opt};
4703 $changes = 1;
4704 }
4705 }
4706
4707 if ($changes) {
4708 PVE::QemuConfig->write_config($vmid, $conf);
4709 }
4710
4711 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
4712
4713 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
4714 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4715 foreach my $opt (sort keys %$pending_delete_hash) {
4716 next if $selection && !$selection->{$opt};
4717 my $force = $pending_delete_hash->{$opt}->{force};
4718 eval {
4719 if ($opt eq 'hotplug') {
4720 die "skip\n" if ($conf->{hotplug} =~ /memory/);
4721 } elsif ($opt eq 'tablet') {
4722 die "skip\n" if !$hotplug_features->{usb};
4723 if ($defaults->{tablet}) {
4724 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4725 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4726 if $arch eq 'aarch64';
4727 } else {
4728 vm_deviceunplug($vmid, $conf, 'tablet');
4729 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4730 }
4731 } elsif ($opt =~ m/^usb\d+/) {
4732 die "skip\n";
4733 # since we cannot reliably hot unplug usb devices we are disabling it
4734 #die "skip\n" if !$hotplug_features->{usb} || $conf->{$opt} =~ m/spice/i;
4735 #vm_deviceunplug($vmid, $conf, $opt);
4736 } elsif ($opt eq 'vcpus') {
4737 die "skip\n" if !$hotplug_features->{cpu};
4738 qemu_cpu_hotplug($vmid, $conf, undef);
4739 } elsif ($opt eq 'balloon') {
4740 # enable balloon device is not hotpluggable
4741 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
4742 # here we reset the ballooning value to memory
4743 my $balloon = $conf->{memory} || $defaults->{memory};
4744 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4745 } elsif ($fast_plug_option->{$opt}) {
4746 # do nothing
4747 } elsif ($opt =~ m/^net(\d+)$/) {
4748 die "skip\n" if !$hotplug_features->{network};
4749 vm_deviceunplug($vmid, $conf, $opt);
4750 } elsif (is_valid_drivename($opt)) {
4751 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
4752 vm_deviceunplug($vmid, $conf, $opt);
4753 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4754 } elsif ($opt =~ m/^memory$/) {
4755 die "skip\n" if !$hotplug_features->{memory};
4756 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
4757 } elsif ($opt eq 'cpuunits') {
4758 $cgroup->change_cpu_shares(undef, 1024);
4759 } elsif ($opt eq 'cpulimit') {
4760 $cgroup->change_cpu_quota(-1, 100000);
4761 } else {
4762 die "skip\n";
4763 }
4764 };
4765 if (my $err = $@) {
4766 &$add_error($opt, $err) if $err ne "skip\n";
4767 } else {
4768 delete $conf->{$opt};
4769 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4770 }
4771 }
4772
4773 my ($apply_pending_cloudinit, $apply_pending_cloudinit_done);
4774 $apply_pending_cloudinit = sub {
4775 return if $apply_pending_cloudinit_done; # once is enough
4776 $apply_pending_cloudinit_done = 1; # once is enough
4777
4778 my ($key, $value) = @_;
4779
4780 my @cloudinit_opts = keys %$confdesc_cloudinit;
4781 foreach my $opt (keys %{$conf->{pending}}) {
4782 next if !grep { $_ eq $opt } @cloudinit_opts;
4783 $conf->{$opt} = delete $conf->{pending}->{$opt};
4784 }
4785
4786 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4787 foreach my $opt (sort keys %$pending_delete_hash) {
4788 next if !grep { $_ eq $opt } @cloudinit_opts;
4789 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4790 delete $conf->{$opt};
4791 }
4792
4793 my $new_conf = { %$conf };
4794 $new_conf->{$key} = $value;
4795 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($new_conf, $vmid);
4796 };
4797
4798 foreach my $opt (keys %{$conf->{pending}}) {
4799 next if $selection && !$selection->{$opt};
4800 my $value = $conf->{pending}->{$opt};
4801 eval {
4802 if ($opt eq 'hotplug') {
4803 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
4804 } elsif ($opt eq 'tablet') {
4805 die "skip\n" if !$hotplug_features->{usb};
4806 if ($value == 1) {
4807 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4808 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4809 if $arch eq 'aarch64';
4810 } elsif ($value == 0) {
4811 vm_deviceunplug($vmid, $conf, 'tablet');
4812 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4813 }
4814 } elsif ($opt =~ m/^usb\d+$/) {
4815 die "skip\n";
4816 # since we cannot reliably hot unplug usb devices we disable it for now
4817 #die "skip\n" if !$hotplug_features->{usb} || $value =~ m/spice/i;
4818 #my $d = eval { parse_property_string($usbdesc->{format}, $value) };
4819 #die "skip\n" if !$d;
4820 #qemu_usb_hotplug($storecfg, $conf, $vmid, $opt, $d, $arch, $machine_type);
4821 } elsif ($opt eq 'vcpus') {
4822 die "skip\n" if !$hotplug_features->{cpu};
4823 qemu_cpu_hotplug($vmid, $conf, $value);
4824 } elsif ($opt eq 'balloon') {
4825 # enable/disable balloning device is not hotpluggable
4826 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
4827 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
4828 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
4829
4830 # allow manual ballooning if shares is set to zero
4831 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
4832 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
4833 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4834 }
4835 } elsif ($opt =~ m/^net(\d+)$/) {
4836 # some changes can be done without hotplug
4837 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
4838 $vmid, $opt, $value, $arch, $machine_type);
4839 } elsif (is_valid_drivename($opt)) {
4840 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
4841 # some changes can be done without hotplug
4842 my $drive = parse_drive($opt, $value);
4843 if (drive_is_cloudinit($drive)) {
4844 &$apply_pending_cloudinit($opt, $value);
4845 }
4846 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
4847 $vmid, $opt, $value, $arch, $machine_type);
4848 } elsif ($opt =~ m/^memory$/) { #dimms
4849 die "skip\n" if !$hotplug_features->{memory};
4850 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
4851 } elsif ($opt eq 'cpuunits') {
4852 $cgroup->change_cpu_shares($conf->{pending}->{$opt}, 1024);
4853 } elsif ($opt eq 'cpulimit') {
4854 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
4855 $cgroup->change_cpu_quota($cpulimit, 100000);
4856 } else {
4857 die "skip\n"; # skip non-hot-pluggable options
4858 }
4859 };
4860 if (my $err = $@) {
4861 &$add_error($opt, $err) if $err ne "skip\n";
4862 } else {
4863 $conf->{$opt} = $value;
4864 delete $conf->{pending}->{$opt};
4865 }
4866 }
4867
4868 PVE::QemuConfig->write_config($vmid, $conf);
4869 }
4870
4871 sub try_deallocate_drive {
4872 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
4873
4874 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
4875 my $volid = $drive->{file};
4876 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
4877 my $sid = PVE::Storage::parse_volume_id($volid);
4878 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
4879
4880 # check if the disk is really unused
4881 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
4882 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
4883 PVE::Storage::vdisk_free($storecfg, $volid);
4884 return 1;
4885 } else {
4886 # If vm is not owner of this disk remove from config
4887 return 1;
4888 }
4889 }
4890
4891 return;
4892 }
4893
4894 sub vmconfig_delete_or_detach_drive {
4895 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
4896
4897 my $drive = parse_drive($opt, $conf->{$opt});
4898
4899 my $rpcenv = PVE::RPCEnvironment::get();
4900 my $authuser = $rpcenv->get_user();
4901
4902 if ($force) {
4903 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
4904 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
4905 } else {
4906 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
4907 }
4908 }
4909
4910
4911
4912 sub vmconfig_apply_pending {
4913 my ($vmid, $conf, $storecfg, $errors) = @_;
4914
4915 my $add_apply_error = sub {
4916 my ($opt, $msg) = @_;
4917 my $err_msg = "unable to apply pending change $opt : $msg";
4918 $errors->{$opt} = $err_msg;
4919 warn $err_msg;
4920 };
4921
4922 # cold plug
4923
4924 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4925 foreach my $opt (sort keys %$pending_delete_hash) {
4926 my $force = $pending_delete_hash->{$opt}->{force};
4927 eval {
4928 if ($opt =~ m/^unused/) {
4929 die "internal error";
4930 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
4931 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4932 }
4933 };
4934 if (my $err = $@) {
4935 $add_apply_error->($opt, $err);
4936 } else {
4937 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4938 delete $conf->{$opt};
4939 }
4940 }
4941
4942 PVE::QemuConfig->cleanup_pending($conf);
4943
4944 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4945 next if $opt eq 'delete'; # just to be sure
4946 eval {
4947 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
4948 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
4949 }
4950 };
4951 if (my $err = $@) {
4952 $add_apply_error->($opt, $err);
4953 } else {
4954 $conf->{$opt} = delete $conf->{pending}->{$opt};
4955 }
4956 }
4957
4958 # write all changes at once to avoid unnecessary i/o
4959 PVE::QemuConfig->write_config($vmid, $conf);
4960 }
4961
4962 sub vmconfig_update_net {
4963 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
4964
4965 my $newnet = parse_net($value);
4966
4967 if ($conf->{$opt}) {
4968 my $oldnet = parse_net($conf->{$opt});
4969
4970 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
4971 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
4972 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
4973 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
4974
4975 # for non online change, we try to hot-unplug
4976 die "skip\n" if !$hotplug;
4977 vm_deviceunplug($vmid, $conf, $opt);
4978 } else {
4979
4980 die "internal error" if $opt !~ m/net(\d+)/;
4981 my $iface = "tap${vmid}i$1";
4982
4983 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
4984 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
4985 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
4986 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
4987 PVE::Network::tap_unplug($iface);
4988
4989 if ($have_sdn) {
4990 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
4991 } else {
4992 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
4993 }
4994 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
4995 # Rate can be applied on its own but any change above needs to
4996 # include the rate in tap_plug since OVS resets everything.
4997 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
4998 }
4999
5000 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
5001 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5002 }
5003
5004 return 1;
5005 }
5006 }
5007
5008 if ($hotplug) {
5009 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5010 } else {
5011 die "skip\n";
5012 }
5013 }
5014
5015 sub vmconfig_update_disk {
5016 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5017
5018 my $drive = parse_drive($opt, $value);
5019
5020 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5021 my $media = $drive->{media} || 'disk';
5022 my $oldmedia = $old_drive->{media} || 'disk';
5023 die "unable to change media type\n" if $media ne $oldmedia;
5024
5025 if (!drive_is_cdrom($old_drive)) {
5026
5027 if ($drive->{file} ne $old_drive->{file}) {
5028
5029 die "skip\n" if !$hotplug;
5030
5031 # unplug and register as unused
5032 vm_deviceunplug($vmid, $conf, $opt);
5033 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5034
5035 } else {
5036 # update existing disk
5037
5038 # skip non hotpluggable value
5039 if (safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5040 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5041 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5042 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5043 safe_string_ne($drive->{ssd}, $old_drive->{ssd})) {
5044 die "skip\n";
5045 }
5046
5047 # apply throttle
5048 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5049 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5050 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5051 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5052 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5053 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5054 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5055 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5056 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5057 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5058 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5059 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5060 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5061 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5062 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5063 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5064 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5065 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5066
5067 qemu_block_set_io_throttle(
5068 $vmid,"drive-$opt",
5069 ($drive->{mbps} || 0)*1024*1024,
5070 ($drive->{mbps_rd} || 0)*1024*1024,
5071 ($drive->{mbps_wr} || 0)*1024*1024,
5072 $drive->{iops} || 0,
5073 $drive->{iops_rd} || 0,
5074 $drive->{iops_wr} || 0,
5075 ($drive->{mbps_max} || 0)*1024*1024,
5076 ($drive->{mbps_rd_max} || 0)*1024*1024,
5077 ($drive->{mbps_wr_max} || 0)*1024*1024,
5078 $drive->{iops_max} || 0,
5079 $drive->{iops_rd_max} || 0,
5080 $drive->{iops_wr_max} || 0,
5081 $drive->{bps_max_length} || 1,
5082 $drive->{bps_rd_max_length} || 1,
5083 $drive->{bps_wr_max_length} || 1,
5084 $drive->{iops_max_length} || 1,
5085 $drive->{iops_rd_max_length} || 1,
5086 $drive->{iops_wr_max_length} || 1,
5087 );
5088
5089 }
5090
5091 return 1;
5092 }
5093
5094 } else { # cdrom
5095
5096 if ($drive->{file} eq 'none') {
5097 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5098 if (drive_is_cloudinit($old_drive)) {
5099 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5100 }
5101 } else {
5102 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5103
5104 # force eject if locked
5105 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5106
5107 if ($path) {
5108 mon_cmd($vmid, "blockdev-change-medium",
5109 id => "$opt", filename => "$path");
5110 }
5111 }
5112
5113 return 1;
5114 }
5115 }
5116
5117 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5118 # hotplug new disks
5119 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5120 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5121 }
5122
5123 # called in locked context by incoming migration
5124 sub vm_migrate_get_nbd_disks {
5125 my ($storecfg, $conf, $replicated_volumes) = @_;
5126
5127 my $local_volumes = {};
5128 PVE::QemuConfig->foreach_volume($conf, sub {
5129 my ($ds, $drive) = @_;
5130
5131 return if drive_is_cdrom($drive);
5132
5133 my $volid = $drive->{file};
5134
5135 return if !$volid;
5136
5137 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5138
5139 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5140 return if $scfg->{shared};
5141
5142 # replicated disks re-use existing state via bitmap
5143 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5144 $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
5145 });
5146 return $local_volumes;
5147 }
5148
5149 # called in locked context by incoming migration
5150 sub vm_migrate_alloc_nbd_disks {
5151 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5152
5153 my $format = undef;
5154
5155 my $nbd = {};
5156 foreach my $opt (sort keys %$source_volumes) {
5157 my ($volid, $storeid, $volname, $drive, $use_existing) = @{$source_volumes->{$opt}};
5158
5159 if ($use_existing) {
5160 $nbd->{$opt}->{drivestr} = print_drive($drive);
5161 $nbd->{$opt}->{volid} = $volid;
5162 $nbd->{$opt}->{replicated} = 1;
5163 next;
5164 }
5165
5166 # If a remote storage is specified and the format of the original
5167 # volume is not available there, fall back to the default format.
5168 # Otherwise use the same format as the original.
5169 if (!$storagemap->{identity}) {
5170 $storeid = map_storage($storagemap, $storeid);
5171 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5172 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5173 my $fileFormat = qemu_img_format($scfg, $volname);
5174 $format = (grep {$fileFormat eq $_} @{$validFormats}) ? $fileFormat : $defFormat;
5175 } else {
5176 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5177 $format = qemu_img_format($scfg, $volname);
5178 }
5179
5180 my $size = $drive->{size} / 1024;
5181 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5182 my $newdrive = $drive;
5183 $newdrive->{format} = $format;
5184 $newdrive->{file} = $newvolid;
5185 my $drivestr = print_drive($newdrive);
5186 $nbd->{$opt}->{drivestr} = $drivestr;
5187 $nbd->{$opt}->{volid} = $newvolid;
5188 }
5189
5190 return $nbd;
5191 }
5192
5193 # see vm_start_nolock for parameters, additionally:
5194 # migrate_opts:
5195 # storagemap = parsed storage map for allocating NBD disks
5196 sub vm_start {
5197 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5198
5199 return PVE::QemuConfig->lock_config($vmid, sub {
5200 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5201
5202 die "you can't start a vm if it's a template\n"
5203 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5204
5205 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5206 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5207
5208 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5209
5210 if ($has_backup_lock && $running) {
5211 # a backup is currently running, attempt to start the guest in the
5212 # existing QEMU instance
5213 return vm_resume($vmid);
5214 }
5215
5216 PVE::QemuConfig->check_lock($conf)
5217 if !($params->{skiplock} || $has_suspended_lock);
5218
5219 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5220
5221 die "VM $vmid already running\n" if $running;
5222
5223 if (my $storagemap = $migrate_opts->{storagemap}) {
5224 my $replicated = $migrate_opts->{replicated_volumes};
5225 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5226 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5227
5228 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5229 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5230 }
5231 }
5232
5233 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5234 });
5235 }
5236
5237
5238 # params:
5239 # statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5240 # skiplock => 0/1, skip checking for config lock
5241 # skiptemplate => 0/1, skip checking whether VM is template
5242 # forcemachine => to force Qemu machine (rollback/migration)
5243 # forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5244 # timeout => in seconds
5245 # paused => start VM in paused state (backup)
5246 # resume => resume from hibernation
5247 # pbs-backing => {
5248 # sata0 => {
5249 # repository
5250 # snapshot
5251 # keyfile
5252 # archive
5253 # },
5254 # virtio2 => ...
5255 # }
5256 # migrate_opts:
5257 # nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5258 # migratedfrom => source node
5259 # spice_ticket => used for spice migration, passed via tunnel/stdin
5260 # network => CIDR of migration network
5261 # type => secure/insecure - tunnel over encrypted connection or plain-text
5262 # nbd_proto_version => int, 0 for TCP, 1 for UNIX
5263 # replicated_volumes = which volids should be re-used with bitmaps for nbd migration
5264 sub vm_start_nolock {
5265 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5266
5267 my $statefile = $params->{statefile};
5268 my $resume = $params->{resume};
5269
5270 my $migratedfrom = $migrate_opts->{migratedfrom};
5271 my $migration_type = $migrate_opts->{type};
5272
5273 my $res = {};
5274
5275 # clean up leftover reboot request files
5276 eval { clear_reboot_request($vmid); };
5277 warn $@ if $@;
5278
5279 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5280 vmconfig_apply_pending($vmid, $conf, $storecfg);
5281 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5282 }
5283
5284 # don't regenerate the ISO if the VM is started as part of a live migration
5285 # this way we can reuse the old ISO with the correct config
5286 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if !$migratedfrom;
5287
5288 my $defaults = load_defaults();
5289
5290 # set environment variable useful inside network script
5291 $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom;
5292
5293 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5294
5295 my $forcemachine = $params->{forcemachine};
5296 my $forcecpu = $params->{forcecpu};
5297 if ($resume) {
5298 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5299 $forcemachine = $conf->{runningmachine};
5300 $forcecpu = $conf->{runningcpu};
5301 print "Resuming suspended VM\n";
5302 }
5303
5304 my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid,
5305 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
5306
5307 my $migration_ip;
5308 my $get_migration_ip = sub {
5309 my ($nodename) = @_;
5310
5311 return $migration_ip if defined($migration_ip);
5312
5313 my $cidr = $migrate_opts->{network};
5314
5315 if (!defined($cidr)) {
5316 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5317 $cidr = $dc_conf->{migration}->{network};
5318 }
5319
5320 if (defined($cidr)) {
5321 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5322
5323 die "could not get IP: no address configured on local " .
5324 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5325
5326 die "could not get IP: multiple addresses configured on local " .
5327 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5328
5329 $migration_ip = @$ips[0];
5330 }
5331
5332 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5333 if !defined($migration_ip);
5334
5335 return $migration_ip;
5336 };
5337
5338 my $migrate_uri;
5339 if ($statefile) {
5340 if ($statefile eq 'tcp') {
5341 my $localip = "localhost";
5342 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5343 my $nodename = nodename();
5344
5345 if (!defined($migration_type)) {
5346 if (defined($datacenterconf->{migration}->{type})) {
5347 $migration_type = $datacenterconf->{migration}->{type};
5348 } else {
5349 $migration_type = 'secure';
5350 }
5351 }
5352
5353 if ($migration_type eq 'insecure') {
5354 $localip = $get_migration_ip->($nodename);
5355 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5356 }
5357
5358 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5359 my $migrate_port = PVE::Tools::next_migrate_port($pfamily);
5360 $migrate_uri = "tcp:${localip}:${migrate_port}";
5361 push @$cmd, '-incoming', $migrate_uri;
5362 push @$cmd, '-S';
5363
5364 } elsif ($statefile eq 'unix') {
5365 # should be default for secure migrations as a ssh TCP forward
5366 # tunnel is not deterministic reliable ready and fails regurarly
5367 # to set up in time, so use UNIX socket forwards
5368 my $socket_addr = "/run/qemu-server/$vmid.migrate";
5369 unlink $socket_addr;
5370
5371 $migrate_uri = "unix:$socket_addr";
5372
5373 push @$cmd, '-incoming', $migrate_uri;
5374 push @$cmd, '-S';
5375
5376 } elsif (-e $statefile) {
5377 push @$cmd, '-loadstate', $statefile;
5378 } else {
5379 my $statepath = PVE::Storage::path($storecfg, $statefile);
5380 push @$vollist, $statefile;
5381 push @$cmd, '-loadstate', $statepath;
5382 }
5383 } elsif ($params->{paused}) {
5384 push @$cmd, '-S';
5385 }
5386
5387 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5388
5389 my $pci_devices = {}; # host pci devices
5390 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
5391 my $dev = $conf->{"hostpci$i"} or next;
5392 $pci_devices->{$i} = parse_hostpci($dev);
5393 }
5394
5395 my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } values $pci_devices->%* ];
5396 # reserve all PCI IDs before actually doing anything with them
5397 PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout);
5398
5399 eval {
5400 for my $id (sort keys %$pci_devices) {
5401 my $d = $pci_devices->{$id};
5402 for my $dev ($d->{pciid}->@*) {
5403 PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
5404 }
5405 }
5406 };
5407 if (my $err = $@) {
5408 eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
5409 warn $@ if $@;
5410 die $err;
5411 }
5412
5413 PVE::Storage::activate_volumes($storecfg, $vollist);
5414
5415 eval {
5416 run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{});
5417 };
5418 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5419 # timeout should be more than enough here...
5420 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 5);
5421
5422 my $cpuunits = get_cpuunits($conf);
5423
5424 my %run_params = (
5425 timeout => $statefile ? undef : $start_timeout,
5426 umask => 0077,
5427 noerr => 1,
5428 );
5429
5430 # when migrating, prefix QEMU output so other side can pick up any
5431 # errors that might occur and show the user
5432 if ($migratedfrom) {
5433 $run_params{quiet} = 1;
5434 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5435 }
5436
5437 my %systemd_properties = (
5438 Slice => 'qemu.slice',
5439 KillMode => 'process',
5440 SendSIGKILL => 0,
5441 TimeoutStopUSec => ULONG_MAX, # infinity
5442 );
5443
5444 if (PVE::CGroup::cgroup_mode() == 2) {
5445 $cpuunits = 10000 if $cpuunits >= 10000; # else we get an error
5446 $systemd_properties{CPUWeight} = $cpuunits;
5447 } else {
5448 $systemd_properties{CPUShares} = $cpuunits;
5449 }
5450
5451 if (my $cpulimit = $conf->{cpulimit}) {
5452 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5453 }
5454 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5455
5456 my $run_qemu = sub {
5457 PVE::Tools::run_fork sub {
5458 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5459
5460 my $tpmpid;
5461 if (my $tpm = $conf->{tpmstate0}) {
5462 # start the TPM emulator so QEMU can connect on start
5463 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5464 }
5465
5466 my $exitcode = run_command($cmd, %run_params);
5467 if ($exitcode) {
5468 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5469 kill 'TERM', $tpmpid if $tpmpid;
5470 die "QEMU exited with code $exitcode\n";
5471 }
5472 };
5473 };
5474
5475 if ($conf->{hugepages}) {
5476
5477 my $code = sub {
5478 my $hugepages_topology = PVE::QemuServer::Memory::hugepages_topology($conf);
5479 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5480
5481 PVE::QemuServer::Memory::hugepages_mount();
5482 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5483
5484 eval { $run_qemu->() };
5485 if (my $err = $@) {
5486 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5487 if !$conf->{keephugepages};
5488 die $err;
5489 }
5490
5491 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5492 if !$conf->{keephugepages};
5493 };
5494 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5495
5496 } else {
5497 eval { $run_qemu->() };
5498 }
5499
5500 if (my $err = $@) {
5501 # deactivate volumes if start fails
5502 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5503 eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
5504
5505 die "start failed: $err";
5506 }
5507
5508 # re-reserve all PCI IDs now that we can know the actual VM PID
5509 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5510 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) };
5511 warn $@ if $@;
5512
5513 print "migration listens on $migrate_uri\n" if $migrate_uri;
5514 $res->{migrate_uri} = $migrate_uri;
5515
5516 if ($statefile && $statefile ne 'tcp' && $statefile ne 'unix') {
5517 eval { mon_cmd($vmid, "cont"); };
5518 warn $@ if $@;
5519 }
5520
5521 #start nbd server for storage migration
5522 if (my $nbd = $migrate_opts->{nbd}) {
5523 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
5524
5525 my $migrate_storage_uri;
5526 # nbd_protocol_version > 0 for unix socket support
5527 if ($nbd_protocol_version > 0 && $migration_type eq 'secure') {
5528 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5529 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5530 $migrate_storage_uri = "nbd:unix:$socket_path";
5531 } else {
5532 my $nodename = nodename();
5533 my $localip = $get_migration_ip->($nodename);
5534 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5535 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
5536
5537 mon_cmd($vmid, "nbd-server-start", addr => {
5538 type => 'inet',
5539 data => {
5540 host => "${localip}",
5541 port => "${storage_migrate_port}",
5542 },
5543 });
5544 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5545 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
5546 }
5547
5548 $res->{migrate_storage_uri} = $migrate_storage_uri;
5549
5550 foreach my $opt (sort keys %$nbd) {
5551 my $drivestr = $nbd->{$opt}->{drivestr};
5552 my $volid = $nbd->{$opt}->{volid};
5553 mon_cmd($vmid, "nbd-server-add", device => "drive-$opt", writable => JSON::true );
5554 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
5555 print "storage migration listens on $nbd_uri volume:$drivestr\n";
5556 print "re-using replicated volume: $opt - $volid\n"
5557 if $nbd->{$opt}->{replicated};
5558
5559 $res->{drives}->{$opt} = $nbd->{$opt};
5560 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
5561 }
5562 }
5563
5564 if ($migratedfrom) {
5565 eval {
5566 set_migration_caps($vmid);
5567 };
5568 warn $@ if $@;
5569
5570 if ($spice_port) {
5571 print "spice listens on port $spice_port\n";
5572 $res->{spice_port} = $spice_port;
5573 if ($migrate_opts->{spice_ticket}) {
5574 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
5575 $migrate_opts->{spice_ticket});
5576 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
5577 }
5578 }
5579
5580 } else {
5581 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
5582 if !$statefile && $conf->{balloon};
5583
5584 foreach my $opt (keys %$conf) {
5585 next if $opt !~ m/^net\d+$/;
5586 my $nicconf = parse_net($conf->{$opt});
5587 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
5588 }
5589 }
5590
5591 mon_cmd($vmid, 'qom-set',
5592 path => "machine/peripheral/balloon0",
5593 property => "guest-stats-polling-interval",
5594 value => 2) if (!defined($conf->{balloon}) || $conf->{balloon});
5595
5596 if ($resume) {
5597 print "Resumed VM, removing state\n";
5598 if (my $vmstate = $conf->{vmstate}) {
5599 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
5600 PVE::Storage::vdisk_free($storecfg, $vmstate);
5601 }
5602 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
5603 PVE::QemuConfig->write_config($vmid, $conf);
5604 }
5605
5606 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
5607
5608 return $res;
5609 }
5610
5611 sub vm_commandline {
5612 my ($storecfg, $vmid, $snapname) = @_;
5613
5614 my $conf = PVE::QemuConfig->load_config($vmid);
5615 my $forcemachine;
5616 my $forcecpu;
5617
5618 if ($snapname) {
5619 my $snapshot = $conf->{snapshots}->{$snapname};
5620 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
5621
5622 # check for machine or CPU overrides in snapshot
5623 $forcemachine = $snapshot->{runningmachine};
5624 $forcecpu = $snapshot->{runningcpu};
5625
5626 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
5627
5628 $conf = $snapshot;
5629 }
5630
5631 my $defaults = load_defaults();
5632
5633 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults,
5634 $forcemachine, $forcecpu);
5635
5636 return PVE::Tools::cmd2string($cmd);
5637 }
5638
5639 sub vm_reset {
5640 my ($vmid, $skiplock) = @_;
5641
5642 PVE::QemuConfig->lock_config($vmid, sub {
5643
5644 my $conf = PVE::QemuConfig->load_config($vmid);
5645
5646 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5647
5648 mon_cmd($vmid, "system_reset");
5649 });
5650 }
5651
5652 sub get_vm_volumes {
5653 my ($conf) = @_;
5654
5655 my $vollist = [];
5656 foreach_volid($conf, sub {
5657 my ($volid, $attr) = @_;
5658
5659 return if $volid =~ m|^/|;
5660
5661 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
5662 return if !$sid;
5663
5664 push @$vollist, $volid;
5665 });
5666
5667 return $vollist;
5668 }
5669
5670 sub vm_stop_cleanup {
5671 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
5672
5673 eval {
5674
5675 if (!$keepActive) {
5676 my $vollist = get_vm_volumes($conf);
5677 PVE::Storage::deactivate_volumes($storecfg, $vollist);
5678
5679 if (my $tpmdrive = $conf->{tpmstate0}) {
5680 my $tpm = parse_drive("tpmstate0", $tpmdrive);
5681 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
5682 if ($storeid) {
5683 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
5684 }
5685 }
5686 }
5687
5688 foreach my $ext (qw(mon qmp pid vnc qga)) {
5689 unlink "/var/run/qemu-server/${vmid}.$ext";
5690 }
5691
5692 if ($conf->{ivshmem}) {
5693 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
5694 # just delete it for now, VMs which have this already open do not
5695 # are affected, but new VMs will get a separated one. If this
5696 # becomes an issue we either add some sort of ref-counting or just
5697 # add a "don't delete on stop" flag to the ivshmem format.
5698 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
5699 }
5700
5701 my $ids = [];
5702 foreach my $key (keys %$conf) {
5703 next if $key !~ m/^hostpci(\d+)$/;
5704 my $hostpciindex = $1;
5705 my $d = parse_hostpci($conf->{$key});
5706 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
5707
5708 foreach my $pci (@{$d->{pciid}}) {
5709 my $pciid = $pci->{id};
5710 push @$ids, $pci->{id};
5711 PVE::SysFSTools::pci_cleanup_mdev_device($pciid, $uuid);
5712 }
5713 }
5714 PVE::QemuServer::PCI::remove_pci_reservation($ids);
5715
5716 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
5717 };
5718 warn $@ if $@; # avoid errors - just warn
5719 }
5720
5721 # call only in locked context
5722 sub _do_vm_stop {
5723 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
5724
5725 my $pid = check_running($vmid, $nocheck);
5726 return if !$pid;
5727
5728 my $conf;
5729 if (!$nocheck) {
5730 $conf = PVE::QemuConfig->load_config($vmid);
5731 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5732 if (!defined($timeout) && $shutdown && $conf->{startup}) {
5733 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
5734 $timeout = $opts->{down} if $opts->{down};
5735 }
5736 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
5737 }
5738
5739 eval {
5740 if ($shutdown) {
5741 if (defined($conf) && get_qga_key($conf, 'enabled')) {
5742 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
5743 } else {
5744 mon_cmd($vmid, "system_powerdown");
5745 }
5746 } else {
5747 mon_cmd($vmid, "quit");
5748 }
5749 };
5750 my $err = $@;
5751
5752 if (!$err) {
5753 $timeout = 60 if !defined($timeout);
5754
5755 my $count = 0;
5756 while (($count < $timeout) && check_running($vmid, $nocheck)) {
5757 $count++;
5758 sleep 1;
5759 }
5760
5761 if ($count >= $timeout) {
5762 if ($force) {
5763 warn "VM still running - terminating now with SIGTERM\n";
5764 kill 15, $pid;
5765 } else {
5766 die "VM quit/powerdown failed - got timeout\n";
5767 }
5768 } else {
5769 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
5770 return;
5771 }
5772 } else {
5773 if (!check_running($vmid, $nocheck)) {
5774 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
5775 return;
5776 }
5777 if ($force) {
5778 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
5779 kill 15, $pid;
5780 } else {
5781 die "VM quit/powerdown failed\n";
5782 }
5783 }
5784
5785 # wait again
5786 $timeout = 10;
5787
5788 my $count = 0;
5789 while (($count < $timeout) && check_running($vmid, $nocheck)) {
5790 $count++;
5791 sleep 1;
5792 }
5793
5794 if ($count >= $timeout) {
5795 warn "VM still running - terminating now with SIGKILL\n";
5796 kill 9, $pid;
5797 sleep 1;
5798 }
5799
5800 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
5801 }
5802
5803 # Note: use $nocheck to skip tests if VM configuration file exists.
5804 # We need that when migration VMs to other nodes (files already moved)
5805 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
5806 sub vm_stop {
5807 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
5808
5809 $force = 1 if !defined($force) && !$shutdown;
5810
5811 if ($migratedfrom){
5812 my $pid = check_running($vmid, $nocheck, $migratedfrom);
5813 kill 15, $pid if $pid;
5814 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
5815 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
5816 return;
5817 }
5818
5819 PVE::QemuConfig->lock_config($vmid, sub {
5820 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
5821 });
5822 }
5823
5824 sub vm_reboot {
5825 my ($vmid, $timeout) = @_;
5826
5827 PVE::QemuConfig->lock_config($vmid, sub {
5828 eval {
5829
5830 # only reboot if running, as qmeventd starts it again on a stop event
5831 return if !check_running($vmid);
5832
5833 create_reboot_request($vmid);
5834
5835 my $storecfg = PVE::Storage::config();
5836 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
5837
5838 };
5839 if (my $err = $@) {
5840 # avoid that the next normal shutdown will be confused for a reboot
5841 clear_reboot_request($vmid);
5842 die $err;
5843 }
5844 });
5845 }
5846
5847 # note: if using the statestorage parameter, the caller has to check privileges
5848 sub vm_suspend {
5849 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
5850
5851 my $conf;
5852 my $path;
5853 my $storecfg;
5854 my $vmstate;
5855
5856 PVE::QemuConfig->lock_config($vmid, sub {
5857
5858 $conf = PVE::QemuConfig->load_config($vmid);
5859
5860 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
5861 PVE::QemuConfig->check_lock($conf)
5862 if !($skiplock || $is_backing_up);
5863
5864 die "cannot suspend to disk during backup\n"
5865 if $is_backing_up && $includestate;
5866
5867 if ($includestate) {
5868 $conf->{lock} = 'suspending';
5869 my $date = strftime("%Y-%m-%d", localtime(time()));
5870 $storecfg = PVE::Storage::config();
5871 if (!$statestorage) {
5872 $statestorage = find_vmstate_storage($conf, $storecfg);
5873 # check permissions for the storage
5874 my $rpcenv = PVE::RPCEnvironment::get();
5875 if ($rpcenv->{type} ne 'cli') {
5876 my $authuser = $rpcenv->get_user();
5877 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
5878 }
5879 }
5880
5881
5882 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
5883 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
5884 $path = PVE::Storage::path($storecfg, $vmstate);
5885 PVE::QemuConfig->write_config($vmid, $conf);
5886 } else {
5887 mon_cmd($vmid, "stop");
5888 }
5889 });
5890
5891 if ($includestate) {
5892 # save vm state
5893 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
5894
5895 eval {
5896 set_migration_caps($vmid, 1);
5897 mon_cmd($vmid, "savevm-start", statefile => $path);
5898 for(;;) {
5899 my $state = mon_cmd($vmid, "query-savevm");
5900 if (!$state->{status}) {
5901 die "savevm not active\n";
5902 } elsif ($state->{status} eq 'active') {
5903 sleep(1);
5904 next;
5905 } elsif ($state->{status} eq 'completed') {
5906 print "State saved, quitting\n";
5907 last;
5908 } elsif ($state->{status} eq 'failed' && $state->{error}) {
5909 die "query-savevm failed with error '$state->{error}'\n"
5910 } else {
5911 die "query-savevm returned status '$state->{status}'\n";
5912 }
5913 }
5914 };
5915 my $err = $@;
5916
5917 PVE::QemuConfig->lock_config($vmid, sub {
5918 $conf = PVE::QemuConfig->load_config($vmid);
5919 if ($err) {
5920 # cleanup, but leave suspending lock, to indicate something went wrong
5921 eval {
5922 mon_cmd($vmid, "savevm-end");
5923 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
5924 PVE::Storage::vdisk_free($storecfg, $vmstate);
5925 delete $conf->@{qw(vmstate runningmachine runningcpu)};
5926 PVE::QemuConfig->write_config($vmid, $conf);
5927 };
5928 warn $@ if $@;
5929 die $err;
5930 }
5931
5932 die "lock changed unexpectedly\n"
5933 if !PVE::QemuConfig->has_lock($conf, 'suspending');
5934
5935 mon_cmd($vmid, "quit");
5936 $conf->{lock} = 'suspended';
5937 PVE::QemuConfig->write_config($vmid, $conf);
5938 });
5939 }
5940 }
5941
5942 sub vm_resume {
5943 my ($vmid, $skiplock, $nocheck) = @_;
5944
5945 PVE::QemuConfig->lock_config($vmid, sub {
5946 my $res = mon_cmd($vmid, 'query-status');
5947 my $resume_cmd = 'cont';
5948 my $reset = 0;
5949
5950 if ($res->{status}) {
5951 return if $res->{status} eq 'running'; # job done, go home
5952 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
5953 $reset = 1 if $res->{status} eq 'shutdown';
5954 }
5955
5956 if (!$nocheck) {
5957
5958 my $conf = PVE::QemuConfig->load_config($vmid);
5959
5960 PVE::QemuConfig->check_lock($conf)
5961 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
5962 }
5963
5964 if ($reset) {
5965 # required if a VM shuts down during a backup and we get a resume
5966 # request before the backup finishes for example
5967 mon_cmd($vmid, "system_reset");
5968 }
5969 mon_cmd($vmid, $resume_cmd);
5970 });
5971 }
5972
5973 sub vm_sendkey {
5974 my ($vmid, $skiplock, $key) = @_;
5975
5976 PVE::QemuConfig->lock_config($vmid, sub {
5977
5978 my $conf = PVE::QemuConfig->load_config($vmid);
5979
5980 # there is no qmp command, so we use the human monitor command
5981 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
5982 die $res if $res ne '';
5983 });
5984 }
5985
5986 # vzdump restore implementaion
5987
5988 sub tar_archive_read_firstfile {
5989 my $archive = shift;
5990
5991 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
5992
5993 # try to detect archive type first
5994 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
5995 die "unable to open file '$archive'\n";
5996 my $firstfile = <$fh>;
5997 kill 15, $pid;
5998 close $fh;
5999
6000 die "ERROR: archive contaions no data\n" if !$firstfile;
6001 chomp $firstfile;
6002
6003 return $firstfile;
6004 }
6005
6006 sub tar_restore_cleanup {
6007 my ($storecfg, $statfile) = @_;
6008
6009 print STDERR "starting cleanup\n";
6010
6011 if (my $fd = IO::File->new($statfile, "r")) {
6012 while (defined(my $line = <$fd>)) {
6013 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6014 my $volid = $2;
6015 eval {
6016 if ($volid =~ m|^/|) {
6017 unlink $volid || die 'unlink failed\n';
6018 } else {
6019 PVE::Storage::vdisk_free($storecfg, $volid);
6020 }
6021 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6022 };
6023 print STDERR "unable to cleanup '$volid' - $@" if $@;
6024 } else {
6025 print STDERR "unable to parse line in statfile - $line";
6026 }
6027 }
6028 $fd->close();
6029 }
6030 }
6031
6032 sub restore_file_archive {
6033 my ($archive, $vmid, $user, $opts) = @_;
6034
6035 return restore_vma_archive($archive, $vmid, $user, $opts)
6036 if $archive eq '-';
6037
6038 my $info = PVE::Storage::archive_info($archive);
6039 my $format = $opts->{format} // $info->{format};
6040 my $comp = $info->{compression};
6041
6042 # try to detect archive format
6043 if ($format eq 'tar') {
6044 return restore_tar_archive($archive, $vmid, $user, $opts);
6045 } else {
6046 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6047 }
6048 }
6049
6050 # hepler to remove disks that will not be used after restore
6051 my $restore_cleanup_oldconf = sub {
6052 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6053
6054 PVE::QemuConfig->foreach_volume($oldconf, sub {
6055 my ($ds, $drive) = @_;
6056
6057 return if drive_is_cdrom($drive, 1);
6058
6059 my $volid = $drive->{file};
6060 return if !$volid || $volid =~ m|^/|;
6061
6062 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6063 return if !$path || !$owner || ($owner != $vmid);
6064
6065 # Note: only delete disk we want to restore
6066 # other volumes will become unused
6067 if ($virtdev_hash->{$ds}) {
6068 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6069 if (my $err = $@) {
6070 warn $err;
6071 }
6072 }
6073 });
6074
6075 # delete vmstate files, after the restore we have no snapshots anymore
6076 foreach my $snapname (keys %{$oldconf->{snapshots}}) {
6077 my $snap = $oldconf->{snapshots}->{$snapname};
6078 if ($snap->{vmstate}) {
6079 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6080 if (my $err = $@) {
6081 warn $err;
6082 }
6083 }
6084 }
6085 };
6086
6087 # Helper to parse vzdump backup device hints
6088 #
6089 # $rpcenv: Environment, used to ckeck storage permissions
6090 # $user: User ID, to check storage permissions
6091 # $storecfg: Storage configuration
6092 # $fh: the file handle for reading the configuration
6093 # $devinfo: should contain device sizes for all backu-up'ed devices
6094 # $options: backup options (pool, default storage)
6095 #
6096 # Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6097 my $parse_backup_hints = sub {
6098 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6099
6100 my $virtdev_hash = {};
6101
6102 while (defined(my $line = <$fh>)) {
6103 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6104 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6105 die "archive does not contain data for drive '$virtdev'\n"
6106 if !$devinfo->{$devname};
6107
6108 if (defined($options->{storage})) {
6109 $storeid = $options->{storage} || 'local';
6110 } elsif (!$storeid) {
6111 $storeid = 'local';
6112 }
6113 $format = 'raw' if !$format;
6114 $devinfo->{$devname}->{devname} = $devname;
6115 $devinfo->{$devname}->{virtdev} = $virtdev;
6116 $devinfo->{$devname}->{format} = $format;
6117 $devinfo->{$devname}->{storeid} = $storeid;
6118
6119 # check permission on storage
6120 my $pool = $options->{pool}; # todo: do we need that?
6121 if ($user ne 'root@pam') {
6122 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace']);
6123 }
6124
6125 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6126 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6127 my $virtdev = $1;
6128 my $drive = parse_drive($virtdev, $2);
6129 if (drive_is_cloudinit($drive)) {
6130 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6131 $storeid = $options->{storage} if defined ($options->{storage});
6132 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6133 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6134
6135 $virtdev_hash->{$virtdev} = {
6136 format => $format,
6137 storeid => $storeid,
6138 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6139 is_cloudinit => 1,
6140 };
6141 }
6142 }
6143 }
6144
6145 return $virtdev_hash;
6146 };
6147
6148 # Helper to allocate and activate all volumes required for a restore
6149 #
6150 # $storecfg: Storage configuration
6151 # $virtdev_hash: as returned by parse_backup_hints()
6152 #
6153 # Returns: { $virtdev => $volid }
6154 my $restore_allocate_devices = sub {
6155 my ($storecfg, $virtdev_hash, $vmid) = @_;
6156
6157 my $map = {};
6158 foreach my $virtdev (sort keys %$virtdev_hash) {
6159 my $d = $virtdev_hash->{$virtdev};
6160 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6161 my $storeid = $d->{storeid};
6162 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6163
6164 # test if requested format is supported
6165 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6166 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6167 $d->{format} = $defFormat if !$supported;
6168
6169 my $name;
6170 if ($d->{is_cloudinit}) {
6171 $name = "vm-$vmid-cloudinit";
6172 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6173 if ($scfg->{path}) {
6174 $name .= ".$d->{format}";
6175 }
6176 }
6177
6178 my $volid = PVE::Storage::vdisk_alloc(
6179 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6180
6181 print STDERR "new volume ID is '$volid'\n";
6182 $d->{volid} = $volid;
6183
6184 PVE::Storage::activate_volumes($storecfg, [$volid]);
6185
6186 $map->{$virtdev} = $volid;
6187 }
6188
6189 return $map;
6190 };
6191
6192 sub restore_update_config_line {
6193 my ($cookie, $map, $line, $unique) = @_;
6194
6195 return '' if $line =~ m/^\#qmdump\#/;
6196 return '' if $line =~ m/^\#vzdump\#/;
6197 return '' if $line =~ m/^lock:/;
6198 return '' if $line =~ m/^unused\d+:/;
6199 return '' if $line =~ m/^parent:/;
6200
6201 my $res = '';
6202
6203 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6204 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6205 # try to convert old 1.X settings
6206 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6207 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6208 my ($model, $macaddr) = split(/\=/, $devconfig);
6209 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6210 my $net = {
6211 model => $model,
6212 bridge => "vmbr$ind",
6213 macaddr => $macaddr,
6214 };
6215 my $netstr = print_net($net);
6216
6217 $res .= "net$cookie->{netcount}: $netstr\n";
6218 $cookie->{netcount}++;
6219 }
6220 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6221 my ($id, $netstr) = ($1, $2);
6222 my $net = parse_net($netstr);
6223 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6224 $netstr = print_net($net);
6225 $res .= "$id: $netstr\n";
6226 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6227 my $virtdev = $1;
6228 my $value = $3;
6229 my $di = parse_drive($virtdev, $value);
6230 if (defined($di->{backup}) && !$di->{backup}) {
6231 $res .= "#$line";
6232 } elsif ($map->{$virtdev}) {
6233 delete $di->{format}; # format can change on restore
6234 $di->{file} = $map->{$virtdev};
6235 $value = print_drive($di);
6236 $res .= "$virtdev: $value\n";
6237 } else {
6238 $res .= $line;
6239 }
6240 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6241 my $vmgenid = $1;
6242 if ($vmgenid ne '0') {
6243 # always generate a new vmgenid if there was a valid one setup
6244 $vmgenid = generate_uuid();
6245 }
6246 $res .= "vmgenid: $vmgenid\n";
6247 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6248 my ($uuid, $uuid_str);
6249 UUID::generate($uuid);
6250 UUID::unparse($uuid, $uuid_str);
6251 my $smbios1 = parse_smbios1($2);
6252 $smbios1->{uuid} = $uuid_str;
6253 $res .= $1.print_smbios1($smbios1)."\n";
6254 } else {
6255 $res .= $line;
6256 }
6257
6258 return $res;
6259 }
6260
6261 my $restore_deactivate_volumes = sub {
6262 my ($storecfg, $devinfo) = @_;
6263
6264 my $vollist = [];
6265 foreach my $devname (keys %$devinfo) {
6266 my $volid = $devinfo->{$devname}->{volid};
6267 push @$vollist, $volid if $volid;
6268 }
6269
6270 PVE::Storage::deactivate_volumes($storecfg, $vollist);
6271 };
6272
6273 my $restore_destroy_volumes = sub {
6274 my ($storecfg, $devinfo) = @_;
6275
6276 foreach my $devname (keys %$devinfo) {
6277 my $volid = $devinfo->{$devname}->{volid};
6278 next if !$volid;
6279 eval {
6280 if ($volid =~ m|^/|) {
6281 unlink $volid || die 'unlink failed\n';
6282 } else {
6283 PVE::Storage::vdisk_free($storecfg, $volid);
6284 }
6285 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6286 };
6287 print STDERR "unable to cleanup '$volid' - $@" if $@;
6288 }
6289 };
6290
6291 sub scan_volids {
6292 my ($cfg, $vmid) = @_;
6293
6294 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6295
6296 my $volid_hash = {};
6297 foreach my $storeid (keys %$info) {
6298 foreach my $item (@{$info->{$storeid}}) {
6299 next if !($item->{volid} && $item->{size});
6300 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6301 $volid_hash->{$item->{volid}} = $item;
6302 }
6303 }
6304
6305 return $volid_hash;
6306 }
6307
6308 sub update_disk_config {
6309 my ($vmid, $conf, $volid_hash) = @_;
6310
6311 my $changes;
6312 my $prefix = "VM $vmid";
6313
6314 # used and unused disks
6315 my $referenced = {};
6316
6317 # Note: it is allowed to define multiple storages with same path (alias), so
6318 # we need to check both 'volid' and real 'path' (two different volid can point
6319 # to the same path).
6320
6321 my $referencedpath = {};
6322
6323 # update size info
6324 PVE::QemuConfig->foreach_volume($conf, sub {
6325 my ($opt, $drive) = @_;
6326
6327 my $volid = $drive->{file};
6328 return if !$volid;
6329 my $volume = $volid_hash->{$volid};
6330
6331 # mark volid as "in-use" for next step
6332 $referenced->{$volid} = 1;
6333 if ($volume && (my $path = $volume->{path})) {
6334 $referencedpath->{$path} = 1;
6335 }
6336
6337 return if drive_is_cdrom($drive);
6338 return if !$volume;
6339
6340 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6341 if (defined($updated)) {
6342 $changes = 1;
6343 $conf->{$opt} = print_drive($updated);
6344 print "$prefix ($opt): $msg\n";
6345 }
6346 });
6347
6348 # remove 'unusedX' entry if volume is used
6349 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6350 my ($opt, $drive) = @_;
6351
6352 my $volid = $drive->{file};
6353 return if !$volid;
6354
6355 my $path;
6356 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6357 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6358 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6359 $changes = 1;
6360 delete $conf->{$opt};
6361 }
6362
6363 $referenced->{$volid} = 1;
6364 $referencedpath->{$path} = 1 if $path;
6365 });
6366
6367 foreach my $volid (sort keys %$volid_hash) {
6368 next if $volid =~ m/vm-$vmid-state-/;
6369 next if $referenced->{$volid};
6370 my $path = $volid_hash->{$volid}->{path};
6371 next if !$path; # just to be sure
6372 next if $referencedpath->{$path};
6373 $changes = 1;
6374 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6375 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6376 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6377 }
6378
6379 return $changes;
6380 }
6381
6382 sub rescan {
6383 my ($vmid, $nolock, $dryrun) = @_;
6384
6385 my $cfg = PVE::Storage::config();
6386
6387 print "rescan volumes...\n";
6388 my $volid_hash = scan_volids($cfg, $vmid);
6389
6390 my $updatefn = sub {
6391 my ($vmid) = @_;
6392
6393 my $conf = PVE::QemuConfig->load_config($vmid);
6394
6395 PVE::QemuConfig->check_lock($conf);
6396
6397 my $vm_volids = {};
6398 foreach my $volid (keys %$volid_hash) {
6399 my $info = $volid_hash->{$volid};
6400 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6401 }
6402
6403 my $changes = update_disk_config($vmid, $conf, $vm_volids);
6404
6405 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
6406 };
6407
6408 if (defined($vmid)) {
6409 if ($nolock) {
6410 &$updatefn($vmid);
6411 } else {
6412 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6413 }
6414 } else {
6415 my $vmlist = config_list();
6416 foreach my $vmid (keys %$vmlist) {
6417 if ($nolock) {
6418 &$updatefn($vmid);
6419 } else {
6420 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6421 }
6422 }
6423 }
6424 }
6425
6426 sub restore_proxmox_backup_archive {
6427 my ($archive, $vmid, $user, $options) = @_;
6428
6429 my $storecfg = PVE::Storage::config();
6430
6431 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
6432 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6433
6434 my $fingerprint = $scfg->{fingerprint};
6435 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
6436
6437 my $repo = PVE::PBSClient::get_repository($scfg);
6438
6439 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
6440 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
6441 local $ENV{PBS_PASSWORD} = $password;
6442 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
6443
6444 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
6445 PVE::Storage::parse_volname($storecfg, $archive);
6446
6447 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
6448
6449 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
6450
6451 my $tmpdir = "/var/tmp/vzdumptmp$$";
6452 rmtree $tmpdir;
6453 mkpath $tmpdir;
6454
6455 my $conffile = PVE::QemuConfig->config_file($vmid);
6456 # disable interrupts (always do cleanups)
6457 local $SIG{INT} =
6458 local $SIG{TERM} =
6459 local $SIG{QUIT} =
6460 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
6461
6462 # Note: $oldconf is undef if VM does not exists
6463 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6464 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6465 my $new_conf_raw = '';
6466
6467 my $rpcenv = PVE::RPCEnvironment::get();
6468 my $devinfo = {};
6469
6470 eval {
6471 # enable interrupts
6472 local $SIG{INT} =
6473 local $SIG{TERM} =
6474 local $SIG{QUIT} =
6475 local $SIG{HUP} =
6476 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6477
6478 my $cfgfn = "$tmpdir/qemu-server.conf";
6479 my $firewall_config_fn = "$tmpdir/fw.conf";
6480 my $index_fn = "$tmpdir/index.json";
6481
6482 my $cmd = "restore";
6483
6484 my $param = [$pbs_backup_name, "index.json", $index_fn];
6485 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6486 my $index = PVE::Tools::file_get_contents($index_fn);
6487 $index = decode_json($index);
6488
6489 # print Dumper($index);
6490 foreach my $info (@{$index->{files}}) {
6491 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
6492 my $devname = $1;
6493 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
6494 $devinfo->{$devname}->{size} = $1;
6495 } else {
6496 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
6497 }
6498 }
6499 }
6500
6501 my $is_qemu_server_backup = scalar(
6502 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
6503 );
6504 if (!$is_qemu_server_backup) {
6505 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
6506 }
6507 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
6508
6509 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
6510 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6511
6512 if ($has_firewall_config) {
6513 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
6514 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6515
6516 my $pve_firewall_dir = '/etc/pve/firewall';
6517 mkdir $pve_firewall_dir; # make sure the dir exists
6518 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
6519 }
6520
6521 my $fh = IO::File->new($cfgfn, "r") ||
6522 die "unable to read qemu-server.conf - $!\n";
6523
6524 my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
6525
6526 # fixme: rate limit?
6527
6528 # create empty/temp config
6529 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
6530
6531 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
6532
6533 # allocate volumes
6534 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
6535
6536 foreach my $virtdev (sort keys %$virtdev_hash) {
6537 my $d = $virtdev_hash->{$virtdev};
6538 next if $d->{is_cloudinit}; # no need to restore cloudinit
6539
6540 # this fails if storage is unavailable
6541 my $volid = $d->{volid};
6542 my $path = PVE::Storage::path($storecfg, $volid);
6543
6544 # for live-restore we only want to preload the efidisk and TPM state
6545 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
6546
6547 my $pbs_restore_cmd = [
6548 '/usr/bin/pbs-restore',
6549 '--repository', $repo,
6550 $pbs_backup_name,
6551 "$d->{devname}.img.fidx",
6552 $path,
6553 '--verbose',
6554 ];
6555
6556 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
6557 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
6558
6559 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
6560 push @$pbs_restore_cmd, '--skip-zero';
6561 }
6562
6563 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
6564 print "restore proxmox backup image: $dbg_cmdstring\n";
6565 run_command($pbs_restore_cmd);
6566 }
6567
6568 $fh->seek(0, 0) || die "seek failed - $!\n";
6569
6570 my $cookie = { netcount => 0 };
6571 while (defined(my $line = <$fh>)) {
6572 $new_conf_raw .= restore_update_config_line(
6573 $cookie,
6574 $map,
6575 $line,
6576 $options->{unique},
6577 );
6578 }
6579
6580 $fh->close();
6581 };
6582 my $err = $@;
6583
6584 if ($err || !$options->{live}) {
6585 $restore_deactivate_volumes->($storecfg, $devinfo);
6586 }
6587
6588 rmtree $tmpdir;
6589
6590 if ($err) {
6591 $restore_destroy_volumes->($storecfg, $devinfo);
6592 die $err;
6593 }
6594
6595 if ($options->{live}) {
6596 # keep lock during live-restore
6597 $new_conf_raw .= "\nlock: create";
6598 }
6599
6600 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
6601
6602 PVE::Cluster::cfs_update(); # make sure we read new file
6603
6604 eval { rescan($vmid, 1); };
6605 warn $@ if $@;
6606
6607 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
6608
6609 if ($options->{live}) {
6610 # enable interrupts
6611 local $SIG{INT} =
6612 local $SIG{TERM} =
6613 local $SIG{QUIT} =
6614 local $SIG{HUP} =
6615 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
6616
6617 my $conf = PVE::QemuConfig->load_config($vmid);
6618 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
6619
6620 # these special drives are already restored before start
6621 delete $devinfo->{'drive-efidisk0'};
6622 delete $devinfo->{'drive-tpmstate0-backup'};
6623 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $repo, $keyfile, $pbs_backup_name);
6624
6625 PVE::QemuConfig->remove_lock($vmid, "create");
6626 }
6627 }
6628
6629 sub pbs_live_restore {
6630 my ($vmid, $conf, $storecfg, $restored_disks, $repo, $keyfile, $snap) = @_;
6631
6632 print "starting VM for live-restore\n";
6633 print "repository: '$repo', snapshot: '$snap'\n";
6634
6635 my $pbs_backing = {};
6636 for my $ds (keys %$restored_disks) {
6637 $ds =~ m/^drive-(.*)$/;
6638 my $confname = $1;
6639 $pbs_backing->{$confname} = {
6640 repository => $repo,
6641 snapshot => $snap,
6642 archive => "$ds.img.fidx",
6643 };
6644 $pbs_backing->{$confname}->{keyfile} = $keyfile if -e $keyfile;
6645
6646 my $drive = parse_drive($confname, $conf->{$confname});
6647 print "restoring '$ds' to '$drive->{file}'\n";
6648 }
6649
6650 my $drives_streamed = 0;
6651 eval {
6652 # make sure HA doesn't interrupt our restore by stopping the VM
6653 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
6654 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
6655 }
6656
6657 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
6658 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
6659 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
6660
6661 my $qmeventd_fd = register_qmeventd_handle($vmid);
6662
6663 # begin streaming, i.e. data copy from PBS to target disk for every vol,
6664 # this will effectively collapse the backing image chain consisting of
6665 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
6666 # removes itself once all backing images vanish with 'auto-remove=on')
6667 my $jobs = {};
6668 for my $ds (sort keys %$restored_disks) {
6669 my $job_id = "restore-$ds";
6670 mon_cmd($vmid, 'block-stream',
6671 'job-id' => $job_id,
6672 device => "$ds",
6673 );
6674 $jobs->{$job_id} = {};
6675 }
6676
6677 mon_cmd($vmid, 'cont');
6678 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
6679
6680 print "restore-drive jobs finished successfully, removing all tracking block devices"
6681 ." to disconnect from Proxmox Backup Server\n";
6682
6683 for my $ds (sort keys %$restored_disks) {
6684 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
6685 }
6686
6687 close($qmeventd_fd);
6688 };
6689
6690 my $err = $@;
6691
6692 if ($err) {
6693 warn "An error occured during live-restore: $err\n";
6694 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
6695 die "live-restore failed\n";
6696 }
6697 }
6698
6699 sub restore_vma_archive {
6700 my ($archive, $vmid, $user, $opts, $comp) = @_;
6701
6702 my $readfrom = $archive;
6703
6704 my $cfg = PVE::Storage::config();
6705 my $commands = [];
6706 my $bwlimit = $opts->{bwlimit};
6707
6708 my $dbg_cmdstring = '';
6709 my $add_pipe = sub {
6710 my ($cmd) = @_;
6711 push @$commands, $cmd;
6712 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
6713 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
6714 $readfrom = '-';
6715 };
6716
6717 my $input = undef;
6718 if ($archive eq '-') {
6719 $input = '<&STDIN';
6720 } else {
6721 # If we use a backup from a PVE defined storage we also consider that
6722 # storage's rate limit:
6723 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
6724 if (defined($volid)) {
6725 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
6726 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
6727 if ($readlimit) {
6728 print STDERR "applying read rate limit: $readlimit\n";
6729 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
6730 $add_pipe->($cstream);
6731 }
6732 }
6733 }
6734
6735 if ($comp) {
6736 my $info = PVE::Storage::decompressor_info('vma', $comp);
6737 my $cmd = $info->{decompressor};
6738 push @$cmd, $readfrom;
6739 $add_pipe->($cmd);
6740 }
6741
6742 my $tmpdir = "/var/tmp/vzdumptmp$$";
6743 rmtree $tmpdir;
6744
6745 # disable interrupts (always do cleanups)
6746 local $SIG{INT} =
6747 local $SIG{TERM} =
6748 local $SIG{QUIT} =
6749 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
6750
6751 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
6752 POSIX::mkfifo($mapfifo, 0600);
6753 my $fifofh;
6754 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
6755
6756 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
6757
6758 my $oldtimeout;
6759 my $timeout = 5;
6760
6761 my $devinfo = {};
6762
6763 my $rpcenv = PVE::RPCEnvironment::get();
6764
6765 my $conffile = PVE::QemuConfig->config_file($vmid);
6766
6767 # Note: $oldconf is undef if VM does not exist
6768 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6769 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6770 my $new_conf_raw = '';
6771
6772 my %storage_limits;
6773
6774 my $print_devmap = sub {
6775 my $cfgfn = "$tmpdir/qemu-server.conf";
6776
6777 # we can read the config - that is already extracted
6778 my $fh = IO::File->new($cfgfn, "r") ||
6779 die "unable to read qemu-server.conf - $!\n";
6780
6781 my $fwcfgfn = "$tmpdir/qemu-server.fw";
6782 if (-f $fwcfgfn) {
6783 my $pve_firewall_dir = '/etc/pve/firewall';
6784 mkdir $pve_firewall_dir; # make sure the dir exists
6785 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
6786 }
6787
6788 my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
6789
6790 foreach my $info (values %{$virtdev_hash}) {
6791 my $storeid = $info->{storeid};
6792 next if defined($storage_limits{$storeid});
6793
6794 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
6795 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
6796 $storage_limits{$storeid} = $limit * 1024;
6797 }
6798
6799 foreach my $devname (keys %$devinfo) {
6800 die "found no device mapping information for device '$devname'\n"
6801 if !$devinfo->{$devname}->{virtdev};
6802 }
6803
6804 # create empty/temp config
6805 if ($oldconf) {
6806 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
6807 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
6808 }
6809
6810 # allocate volumes
6811 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
6812
6813 # print restore information to $fifofh
6814 foreach my $virtdev (sort keys %$virtdev_hash) {
6815 my $d = $virtdev_hash->{$virtdev};
6816 next if $d->{is_cloudinit}; # no need to restore cloudinit
6817
6818 my $storeid = $d->{storeid};
6819 my $volid = $d->{volid};
6820
6821 my $map_opts = '';
6822 if (my $limit = $storage_limits{$storeid}) {
6823 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
6824 }
6825
6826 my $write_zeros = 1;
6827 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
6828 $write_zeros = 0;
6829 }
6830
6831 my $path = PVE::Storage::path($cfg, $volid);
6832
6833 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
6834
6835 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
6836 }
6837
6838 $fh->seek(0, 0) || die "seek failed - $!\n";
6839
6840 my $cookie = { netcount => 0 };
6841 while (defined(my $line = <$fh>)) {
6842 $new_conf_raw .= restore_update_config_line(
6843 $cookie,
6844 $map,
6845 $line,
6846 $opts->{unique},
6847 );
6848 }
6849
6850 $fh->close();
6851 };
6852
6853 eval {
6854 # enable interrupts
6855 local $SIG{INT} =
6856 local $SIG{TERM} =
6857 local $SIG{QUIT} =
6858 local $SIG{HUP} =
6859 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6860 local $SIG{ALRM} = sub { die "got timeout\n"; };
6861
6862 $oldtimeout = alarm($timeout);
6863
6864 my $parser = sub {
6865 my $line = shift;
6866
6867 print "$line\n";
6868
6869 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
6870 my ($dev_id, $size, $devname) = ($1, $2, $3);
6871 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
6872 } elsif ($line =~ m/^CTIME: /) {
6873 # we correctly received the vma config, so we can disable
6874 # the timeout now for disk allocation (set to 10 minutes, so
6875 # that we always timeout if something goes wrong)
6876 alarm(600);
6877 &$print_devmap();
6878 print $fifofh "done\n";
6879 my $tmp = $oldtimeout || 0;
6880 $oldtimeout = undef;
6881 alarm($tmp);
6882 close($fifofh);
6883 $fifofh = undef;
6884 }
6885 };
6886
6887 print "restore vma archive: $dbg_cmdstring\n";
6888 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
6889 };
6890 my $err = $@;
6891
6892 alarm($oldtimeout) if $oldtimeout;
6893
6894 $restore_deactivate_volumes->($cfg, $devinfo);
6895
6896 close($fifofh) if $fifofh;
6897 unlink $mapfifo;
6898 rmtree $tmpdir;
6899
6900 if ($err) {
6901 $restore_destroy_volumes->($cfg, $devinfo);
6902 die $err;
6903 }
6904
6905 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
6906
6907 PVE::Cluster::cfs_update(); # make sure we read new file
6908
6909 eval { rescan($vmid, 1); };
6910 warn $@ if $@;
6911
6912 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
6913 }
6914
6915 sub restore_tar_archive {
6916 my ($archive, $vmid, $user, $opts) = @_;
6917
6918 if ($archive ne '-') {
6919 my $firstfile = tar_archive_read_firstfile($archive);
6920 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
6921 if $firstfile ne 'qemu-server.conf';
6922 }
6923
6924 my $storecfg = PVE::Storage::config();
6925
6926 # avoid zombie disks when restoring over an existing VM -> cleanup first
6927 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
6928 # skiplock=1 because qmrestore has set the 'create' lock itself already
6929 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
6930 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
6931
6932 my $tocmd = "/usr/lib/qemu-server/qmextract";
6933
6934 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
6935 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
6936 $tocmd .= ' --prealloc' if $opts->{prealloc};
6937 $tocmd .= ' --info' if $opts->{info};
6938
6939 # tar option "xf" does not autodetect compression when read from STDIN,
6940 # so we pipe to zcat
6941 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
6942 PVE::Tools::shellquote("--to-command=$tocmd");
6943
6944 my $tmpdir = "/var/tmp/vzdumptmp$$";
6945 mkpath $tmpdir;
6946
6947 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
6948 local $ENV{VZDUMP_VMID} = $vmid;
6949 local $ENV{VZDUMP_USER} = $user;
6950
6951 my $conffile = PVE::QemuConfig->config_file($vmid);
6952 my $new_conf_raw = '';
6953
6954 # disable interrupts (always do cleanups)
6955 local $SIG{INT} =
6956 local $SIG{TERM} =
6957 local $SIG{QUIT} =
6958 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
6959
6960 eval {
6961 # enable interrupts
6962 local $SIG{INT} =
6963 local $SIG{TERM} =
6964 local $SIG{QUIT} =
6965 local $SIG{HUP} =
6966 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6967
6968 if ($archive eq '-') {
6969 print "extracting archive from STDIN\n";
6970 run_command($cmd, input => "<&STDIN");
6971 } else {
6972 print "extracting archive '$archive'\n";
6973 run_command($cmd);
6974 }
6975
6976 return if $opts->{info};
6977
6978 # read new mapping
6979 my $map = {};
6980 my $statfile = "$tmpdir/qmrestore.stat";
6981 if (my $fd = IO::File->new($statfile, "r")) {
6982 while (defined (my $line = <$fd>)) {
6983 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6984 $map->{$1} = $2 if $1;
6985 } else {
6986 print STDERR "unable to parse line in statfile - $line\n";
6987 }
6988 }
6989 $fd->close();
6990 }
6991
6992 my $confsrc = "$tmpdir/qemu-server.conf";
6993
6994 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
6995
6996 my $cookie = { netcount => 0 };
6997 while (defined (my $line = <$srcfd>)) {
6998 $new_conf_raw .= restore_update_config_line(
6999 $cookie,
7000 $map,
7001 $line,
7002 $opts->{unique},
7003 );
7004 }
7005
7006 $srcfd->close();
7007 };
7008 if (my $err = $@) {
7009 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7010 die $err;
7011 }
7012
7013 rmtree $tmpdir;
7014
7015 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7016
7017 PVE::Cluster::cfs_update(); # make sure we read new file
7018
7019 eval { rescan($vmid, 1); };
7020 warn $@ if $@;
7021 };
7022
7023 sub foreach_storage_used_by_vm {
7024 my ($conf, $func) = @_;
7025
7026 my $sidhash = {};
7027
7028 PVE::QemuConfig->foreach_volume($conf, sub {
7029 my ($ds, $drive) = @_;
7030 return if drive_is_cdrom($drive);
7031
7032 my $volid = $drive->{file};
7033
7034 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7035 $sidhash->{$sid} = $sid if $sid;
7036 });
7037
7038 foreach my $sid (sort keys %$sidhash) {
7039 &$func($sid);
7040 }
7041 }
7042
7043 my $qemu_snap_storage = {
7044 rbd => 1,
7045 };
7046 sub do_snapshots_with_qemu {
7047 my ($storecfg, $volid, $deviceid) = @_;
7048
7049 return if $deviceid =~ m/tpmstate0/;
7050
7051 my $storage_name = PVE::Storage::parse_volume_id($volid);
7052 my $scfg = $storecfg->{ids}->{$storage_name};
7053 die "could not find storage '$storage_name'\n" if !defined($scfg);
7054
7055 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7056 return 1;
7057 }
7058
7059 if ($volid =~ m/\.(qcow2|qed)$/){
7060 return 1;
7061 }
7062
7063 return;
7064 }
7065
7066 sub qga_check_running {
7067 my ($vmid, $nowarn) = @_;
7068
7069 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7070 if ($@) {
7071 warn "Qemu Guest Agent is not running - $@" if !$nowarn;
7072 return 0;
7073 }
7074 return 1;
7075 }
7076
7077 sub template_create {
7078 my ($vmid, $conf, $disk) = @_;
7079
7080 my $storecfg = PVE::Storage::config();
7081
7082 PVE::QemuConfig->foreach_volume($conf, sub {
7083 my ($ds, $drive) = @_;
7084
7085 return if drive_is_cdrom($drive);
7086 return if $disk && $ds ne $disk;
7087
7088 my $volid = $drive->{file};
7089 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7090
7091 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7092 $drive->{file} = $voliddst;
7093 $conf->{$ds} = print_drive($drive);
7094 PVE::QemuConfig->write_config($vmid, $conf);
7095 });
7096 }
7097
7098 sub convert_iscsi_path {
7099 my ($path) = @_;
7100
7101 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7102 my $portal = $1;
7103 my $target = $2;
7104 my $lun = $3;
7105
7106 my $initiator_name = get_initiator_name();
7107
7108 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7109 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7110 }
7111
7112 die "cannot convert iscsi path '$path', unkown format\n";
7113 }
7114
7115 sub qemu_img_convert {
7116 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized) = @_;
7117
7118 my $storecfg = PVE::Storage::config();
7119 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7120 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7121
7122 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7123
7124 my $cachemode;
7125 my $src_path;
7126 my $src_is_iscsi = 0;
7127 my $src_format;
7128
7129 if ($src_storeid) {
7130 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7131 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7132 $src_format = qemu_img_format($src_scfg, $src_volname);
7133 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7134 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7135 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7136 } elsif (-f $src_volid) {
7137 $src_path = $src_volid;
7138 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7139 $src_format = $1;
7140 }
7141 }
7142
7143 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7144
7145 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7146 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7147 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7148 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7149
7150 my $cmd = [];
7151 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7152 push @$cmd, '-l', "snapshot.name=$snapname"
7153 if $snapname && $src_format && $src_format eq "qcow2";
7154 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7155 push @$cmd, '-T', $cachemode if defined($cachemode);
7156
7157 if ($src_is_iscsi) {
7158 push @$cmd, '--image-opts';
7159 $src_path = convert_iscsi_path($src_path);
7160 } elsif ($src_format) {
7161 push @$cmd, '-f', $src_format;
7162 }
7163
7164 if ($dst_is_iscsi) {
7165 push @$cmd, '--target-image-opts';
7166 $dst_path = convert_iscsi_path($dst_path);
7167 } else {
7168 push @$cmd, '-O', $dst_format;
7169 }
7170
7171 push @$cmd, $src_path;
7172
7173 if (!$dst_is_iscsi && $is_zero_initialized) {
7174 push @$cmd, "zeroinit:$dst_path";
7175 } else {
7176 push @$cmd, $dst_path;
7177 }
7178
7179 my $parser = sub {
7180 my $line = shift;
7181 if($line =~ m/\((\S+)\/100\%\)/){
7182 my $percent = $1;
7183 my $transferred = int($size * $percent / 100);
7184 my $total_h = render_bytes($size, 1);
7185 my $transferred_h = render_bytes($transferred, 1);
7186
7187 print "transferred $transferred_h of $total_h ($percent%)\n";
7188 }
7189
7190 };
7191
7192 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7193 my $err = $@;
7194 die "copy failed: $err" if $err;
7195 }
7196
7197 sub qemu_img_format {
7198 my ($scfg, $volname) = @_;
7199
7200 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7201 return $1;
7202 } else {
7203 return "raw";
7204 }
7205 }
7206
7207 sub qemu_drive_mirror {
7208 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7209
7210 $jobs = {} if !$jobs;
7211
7212 my $qemu_target;
7213 my $format;
7214 $jobs->{"drive-$drive"} = {};
7215
7216 if ($dst_volid =~ /^nbd:/) {
7217 $qemu_target = $dst_volid;
7218 $format = "nbd";
7219 } else {
7220 my $storecfg = PVE::Storage::config();
7221 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7222
7223 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7224
7225 $format = qemu_img_format($dst_scfg, $dst_volname);
7226
7227 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7228
7229 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7230 }
7231
7232 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7233 $opts->{format} = $format if $format;
7234
7235 if (defined($src_bitmap)) {
7236 $opts->{sync} = 'incremental';
7237 $opts->{bitmap} = $src_bitmap;
7238 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7239 }
7240
7241 if (defined($bwlimit)) {
7242 $opts->{speed} = $bwlimit * 1024;
7243 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7244 } else {
7245 print "drive mirror is starting for drive-$drive\n";
7246 }
7247
7248 # if a job already runs for this device we get an error, catch it for cleanup
7249 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7250 if (my $err = $@) {
7251 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7252 warn "$@\n" if $@;
7253 die "mirroring error: $err\n";
7254 }
7255
7256 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7257 }
7258
7259 # $completion can be either
7260 # 'complete': wait until all jobs are ready, block-job-complete them (default)
7261 # 'cancel': wait until all jobs are ready, block-job-cancel them
7262 # 'skip': wait until all jobs are ready, return with block jobs in ready state
7263 # 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7264 sub qemu_drive_mirror_monitor {
7265 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7266
7267 $completion //= 'complete';
7268 $op //= "mirror";
7269
7270 eval {
7271 my $err_complete = 0;
7272
7273 my $starttime = time ();
7274 while (1) {
7275 die "block job ('$op') timed out\n" if $err_complete > 300;
7276
7277 my $stats = mon_cmd($vmid, "query-block-jobs");
7278 my $ctime = time();
7279
7280 my $running_jobs = {};
7281 for my $stat (@$stats) {
7282 next if $stat->{type} ne $op;
7283 $running_jobs->{$stat->{device}} = $stat;
7284 }
7285
7286 my $readycounter = 0;
7287
7288 for my $job_id (sort keys %$jobs) {
7289 my $job = $running_jobs->{$job_id};
7290
7291 my $vanished = !defined($job);
7292 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7293 if($complete || ($vanished && $completion eq 'auto')) {
7294 print "$job_id: $op-job finished\n";
7295 delete $jobs->{$job_id};
7296 next;
7297 }
7298
7299 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7300
7301 my $busy = $job->{busy};
7302 my $ready = $job->{ready};
7303 if (my $total = $job->{len}) {
7304 my $transferred = $job->{offset} || 0;
7305 my $remaining = $total - $transferred;
7306 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7307
7308 my $duration = $ctime - $starttime;
7309 my $total_h = render_bytes($total, 1);
7310 my $transferred_h = render_bytes($transferred, 1);
7311
7312 my $status = sprintf(
7313 "transferred $transferred_h of $total_h ($percent%%) in %s",
7314 render_duration($duration),
7315 );
7316
7317 if ($ready) {
7318 if ($busy) {
7319 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7320 } else {
7321 $status .= ", ready";
7322 }
7323 }
7324 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7325 $jobs->{$job_id}->{ready} = $ready;
7326 }
7327
7328 $readycounter++ if $job->{ready};
7329 }
7330
7331 last if scalar(keys %$jobs) == 0;
7332
7333 if ($readycounter == scalar(keys %$jobs)) {
7334 print "all '$op' jobs are ready\n";
7335
7336 # do the complete later (or has already been done)
7337 last if $completion eq 'skip' || $completion eq 'auto';
7338
7339 if ($vmiddst && $vmiddst != $vmid) {
7340 my $agent_running = $qga && qga_check_running($vmid);
7341 if ($agent_running) {
7342 print "freeze filesystem\n";
7343 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
7344 } else {
7345 print "suspend vm\n";
7346 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
7347 }
7348
7349 # if we clone a disk for a new target vm, we don't switch the disk
7350 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
7351
7352 if ($agent_running) {
7353 print "unfreeze filesystem\n";
7354 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
7355 } else {
7356 print "resume vm\n";
7357 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7358 }
7359
7360 last;
7361 } else {
7362
7363 for my $job_id (sort keys %$jobs) {
7364 # try to switch the disk if source and destination are on the same guest
7365 print "$job_id: Completing block job_id...\n";
7366
7367 my $op;
7368 if ($completion eq 'complete') {
7369 $op = 'block-job-complete';
7370 } elsif ($completion eq 'cancel') {
7371 $op = 'block-job-cancel';
7372 } else {
7373 die "invalid completion value: $completion\n";
7374 }
7375 eval { mon_cmd($vmid, $op, device => $job_id) };
7376 if ($@ =~ m/cannot be completed/) {
7377 print "$job_id: block job cannot be completed, trying again.\n";
7378 $err_complete++;
7379 }else {
7380 print "$job_id: Completed successfully.\n";
7381 $jobs->{$job_id}->{complete} = 1;
7382 }
7383 }
7384 }
7385 }
7386 sleep 1;
7387 }
7388 };
7389 my $err = $@;
7390
7391 if ($err) {
7392 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7393 die "block job ($op) error: $err";
7394 }
7395 }
7396
7397 sub qemu_blockjobs_cancel {
7398 my ($vmid, $jobs) = @_;
7399
7400 foreach my $job (keys %$jobs) {
7401 print "$job: Cancelling block job\n";
7402 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
7403 $jobs->{$job}->{cancel} = 1;
7404 }
7405
7406 while (1) {
7407 my $stats = mon_cmd($vmid, "query-block-jobs");
7408
7409 my $running_jobs = {};
7410 foreach my $stat (@$stats) {
7411 $running_jobs->{$stat->{device}} = $stat;
7412 }
7413
7414 foreach my $job (keys %$jobs) {
7415
7416 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
7417 print "$job: Done.\n";
7418 delete $jobs->{$job};
7419 }
7420 }
7421
7422 last if scalar(keys %$jobs) == 0;
7423
7424 sleep 1;
7425 }
7426 }
7427
7428 sub clone_disk {
7429 my ($storecfg, $vmid, $running, $drivename, $drive, $snapname,
7430 $newvmid, $storage, $format, $full, $newvollist, $jobs, $completion, $qga, $bwlimit, $conf) = @_;
7431
7432 my $newvolid;
7433
7434 if (!$full) {
7435 print "create linked clone of drive $drivename ($drive->{file})\n";
7436 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
7437 push @$newvollist, $newvolid;
7438 } else {
7439
7440 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
7441 $storeid = $storage if $storage;
7442
7443 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
7444
7445 print "create full clone of drive $drivename ($drive->{file})\n";
7446 my $name = undef;
7447 my $size = undef;
7448 if (drive_is_cloudinit($drive)) {
7449 $name = "vm-$newvmid-cloudinit";
7450 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7451 if ($scfg->{path}) {
7452 $name .= ".$dst_format";
7453 }
7454 $snapname = undef;
7455 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
7456 } elsif ($drivename eq 'efidisk0') {
7457 $size = get_efivars_size($conf);
7458 } elsif ($drivename eq 'tpmstate0') {
7459 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7460 } else {
7461 ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
7462 }
7463 $newvolid = PVE::Storage::vdisk_alloc(
7464 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
7465 );
7466 push @$newvollist, $newvolid;
7467
7468 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
7469
7470 if (drive_is_cloudinit($drive)) {
7471 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
7472 # if this is the case, we have to complete any block-jobs still there from
7473 # previous drive-mirrors
7474 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
7475 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
7476 }
7477 goto no_data_clone;
7478 }
7479
7480 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
7481 if (!$running || $snapname) {
7482 # TODO: handle bwlimits
7483 if ($drivename eq 'efidisk0') {
7484 # the relevant data on the efidisk may be smaller than the source
7485 # e.g. on RBD/ZFS, so we use dd to copy only the amount
7486 # that is given by the OVMF_VARS.fd
7487 my $src_path = PVE::Storage::path($storecfg, $drive->{file});
7488 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
7489
7490 # better for Ceph if block size is not too small, see bug #3324
7491 my $bs = 1024*1024;
7492
7493 run_command(['qemu-img', 'dd', '-n', '-O', $dst_format, "bs=$bs", "osize=$size",
7494 "if=$src_path", "of=$dst_path"]);
7495 } else {
7496 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit);
7497 }
7498 } else {
7499
7500 die "cannot move TPM state while VM is running\n" if $drivename eq 'tpmstate0';
7501
7502 my $kvmver = get_running_qemu_version ($vmid);
7503 if (!min_version($kvmver, 2, 7)) {
7504 die "drive-mirror with iothread requires qemu version 2.7 or higher\n"
7505 if $drive->{iothread};
7506 }
7507
7508 qemu_drive_mirror($vmid, $drivename, $newvolid, $newvmid, $sparseinit, $jobs,
7509 $completion, $qga, $bwlimit);
7510 }
7511 }
7512
7513 no_data_clone:
7514 my ($size) = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
7515
7516 my $disk = $drive;
7517 $disk->{format} = undef;
7518 $disk->{file} = $newvolid;
7519 $disk->{size} = $size if defined($size);
7520
7521 return $disk;
7522 }
7523
7524 sub get_running_qemu_version {
7525 my ($vmid) = @_;
7526 my $res = mon_cmd($vmid, "query-version");
7527 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
7528 }
7529
7530 sub qemu_use_old_bios_files {
7531 my ($machine_type) = @_;
7532
7533 return if !$machine_type;
7534
7535 my $use_old_bios_files = undef;
7536
7537 if ($machine_type =~ m/^(\S+)\.pxe$/) {
7538 $machine_type = $1;
7539 $use_old_bios_files = 1;
7540 } else {
7541 my $version = extract_version($machine_type, kvm_user_version());
7542 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
7543 # load new efi bios files on migration. So this hack is required to allow
7544 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
7545 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
7546 $use_old_bios_files = !min_version($version, 2, 4);
7547 }
7548
7549 return ($use_old_bios_files, $machine_type);
7550 }
7551
7552 sub get_efivars_size {
7553 my ($conf) = @_;
7554 my $arch = get_vm_arch($conf);
7555 my $efidisk = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
7556 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk);
7557 die "uefi vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
7558 return -s $ovmf_vars;
7559 }
7560
7561 sub update_efidisk_size {
7562 my ($conf) = @_;
7563
7564 return if !defined($conf->{efidisk0});
7565
7566 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
7567 $disk->{size} = get_efivars_size($conf);
7568 $conf->{efidisk0} = print_drive($disk);
7569
7570 return;
7571 }
7572
7573 sub update_tpmstate_size {
7574 my ($conf) = @_;
7575
7576 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
7577 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7578 $conf->{tpmstate0} = print_drive($disk);
7579 }
7580
7581 sub create_efidisk($$$$$$) {
7582 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk) = @_;
7583
7584 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk);
7585 die "EFI vars default image not found\n" if ! -f $ovmf_vars;
7586
7587 my $vars_size_b = -s $ovmf_vars;
7588 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
7589 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
7590 PVE::Storage::activate_volumes($storecfg, [$volid]);
7591
7592 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
7593 my ($size) = PVE::Storage::volume_size_info($storecfg, $volid, 3);
7594
7595 return ($volid, $size/1024);
7596 }
7597
7598 sub vm_iothreads_list {
7599 my ($vmid) = @_;
7600
7601 my $res = mon_cmd($vmid, 'query-iothreads');
7602
7603 my $iothreads = {};
7604 foreach my $iothread (@$res) {
7605 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
7606 }
7607
7608 return $iothreads;
7609 }
7610
7611 sub scsihw_infos {
7612 my ($conf, $drive) = @_;
7613
7614 my $maxdev = 0;
7615
7616 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
7617 $maxdev = 7;
7618 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
7619 $maxdev = 1;
7620 } else {
7621 $maxdev = 256;
7622 }
7623
7624 my $controller = int($drive->{index} / $maxdev);
7625 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
7626 ? "virtioscsi"
7627 : "scsihw";
7628
7629 return ($maxdev, $controller, $controller_prefix);
7630 }
7631
7632 sub windows_version {
7633 my ($ostype) = @_;
7634
7635 return 0 if !$ostype;
7636
7637 my $winversion = 0;
7638
7639 if($ostype eq 'wxp' || $ostype eq 'w2k3' || $ostype eq 'w2k') {
7640 $winversion = 5;
7641 } elsif($ostype eq 'w2k8' || $ostype eq 'wvista') {
7642 $winversion = 6;
7643 } elsif ($ostype =~ m/^win(\d+)$/) {
7644 $winversion = $1;
7645 }
7646
7647 return $winversion;
7648 }
7649
7650 sub resolve_dst_disk_format {
7651 my ($storecfg, $storeid, $src_volname, $format) = @_;
7652 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
7653
7654 if (!$format) {
7655 # if no target format is specified, use the source disk format as hint
7656 if ($src_volname) {
7657 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7658 $format = qemu_img_format($scfg, $src_volname);
7659 } else {
7660 return $defFormat;
7661 }
7662 }
7663
7664 # test if requested format is supported - else use default
7665 my $supported = grep { $_ eq $format } @$validFormats;
7666 $format = $defFormat if !$supported;
7667 return $format;
7668 }
7669
7670 # NOTE: if this logic changes, please update docs & possibly gui logic
7671 sub find_vmstate_storage {
7672 my ($conf, $storecfg) = @_;
7673
7674 # first, return storage from conf if set
7675 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
7676
7677 my ($target, $shared, $local);
7678
7679 foreach_storage_used_by_vm($conf, sub {
7680 my ($sid) = @_;
7681 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
7682 my $dst = $scfg->{shared} ? \$shared : \$local;
7683 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
7684 });
7685
7686 # second, use shared storage where VM has at least one disk
7687 # third, use local storage where VM has at least one disk
7688 # fall back to local storage
7689 $target = $shared // $local // 'local';
7690
7691 return $target;
7692 }
7693
7694 sub generate_uuid {
7695 my ($uuid, $uuid_str);
7696 UUID::generate($uuid);
7697 UUID::unparse($uuid, $uuid_str);
7698 return $uuid_str;
7699 }
7700
7701 sub generate_smbios1_uuid {
7702 return "uuid=".generate_uuid();
7703 }
7704
7705 sub nbd_stop {
7706 my ($vmid) = @_;
7707
7708 mon_cmd($vmid, 'nbd-server-stop');
7709 }
7710
7711 sub create_reboot_request {
7712 my ($vmid) = @_;
7713 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
7714 or die "failed to create reboot trigger file: $!\n";
7715 close($fh);
7716 }
7717
7718 sub clear_reboot_request {
7719 my ($vmid) = @_;
7720 my $path = "/run/qemu-server/$vmid.reboot";
7721 my $res = 0;
7722
7723 $res = unlink($path);
7724 die "could not remove reboot request for $vmid: $!"
7725 if !$res && $! != POSIX::ENOENT;
7726
7727 return $res;
7728 }
7729
7730 sub bootorder_from_legacy {
7731 my ($conf, $bootcfg) = @_;
7732
7733 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
7734 my $bootindex_hash = {};
7735 my $i = 1;
7736 foreach my $o (split(//, $boot)) {
7737 $bootindex_hash->{$o} = $i*100;
7738 $i++;
7739 }
7740
7741 my $bootorder = {};
7742
7743 PVE::QemuConfig->foreach_volume($conf, sub {
7744 my ($ds, $drive) = @_;
7745
7746 if (drive_is_cdrom ($drive, 1)) {
7747 if ($bootindex_hash->{d}) {
7748 $bootorder->{$ds} = $bootindex_hash->{d};
7749 $bootindex_hash->{d} += 1;
7750 }
7751 } elsif ($bootindex_hash->{c}) {
7752 $bootorder->{$ds} = $bootindex_hash->{c}
7753 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
7754 $bootindex_hash->{c} += 1;
7755 }
7756 });
7757
7758 if ($bootindex_hash->{n}) {
7759 for (my $i = 0; $i < $MAX_NETS; $i++) {
7760 my $netname = "net$i";
7761 next if !$conf->{$netname};
7762 $bootorder->{$netname} = $bootindex_hash->{n};
7763 $bootindex_hash->{n} += 1;
7764 }
7765 }
7766
7767 return $bootorder;
7768 }
7769
7770 # Generate default device list for 'boot: order=' property. Matches legacy
7771 # default boot order, but with explicit device names. This is important, since
7772 # the fallback for when neither 'order' nor the old format is specified relies
7773 # on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
7774 sub get_default_bootdevices {
7775 my ($conf) = @_;
7776
7777 my @ret = ();
7778
7779 # harddisk
7780 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
7781 push @ret, $first if $first;
7782
7783 # cdrom
7784 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
7785 push @ret, $first if $first;
7786
7787 # network
7788 for (my $i = 0; $i < $MAX_NETS; $i++) {
7789 my $netname = "net$i";
7790 next if !$conf->{$netname};
7791 push @ret, $netname;
7792 last;
7793 }
7794
7795 return \@ret;
7796 }
7797
7798 sub device_bootorder {
7799 my ($conf) = @_;
7800
7801 return bootorder_from_legacy($conf) if !defined($conf->{boot});
7802
7803 my $boot = parse_property_string($boot_fmt, $conf->{boot});
7804
7805 my $bootorder = {};
7806 if (!defined($boot) || $boot->{legacy}) {
7807 $bootorder = bootorder_from_legacy($conf, $boot);
7808 } elsif ($boot->{order}) {
7809 my $i = 100; # start at 100 to allow user to insert devices before us with -args
7810 for my $dev (PVE::Tools::split_list($boot->{order})) {
7811 $bootorder->{$dev} = $i++;
7812 }
7813 }
7814
7815 return $bootorder;
7816 }
7817
7818 sub register_qmeventd_handle {
7819 my ($vmid) = @_;
7820
7821 my $fh;
7822 my $peer = "/var/run/qmeventd.sock";
7823 my $count = 0;
7824
7825 for (;;) {
7826 $count++;
7827 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
7828 last if $fh;
7829 if ($! != EINTR && $! != EAGAIN) {
7830 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
7831 }
7832 if ($count > 4) {
7833 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
7834 . "after $count retries\n";
7835 }
7836 usleep(25000);
7837 }
7838
7839 # send handshake to mark VM as backing up
7840 print $fh to_json({vzdump => {vmid => "$vmid"}});
7841
7842 # return handle to be closed later when inhibit is no longer required
7843 return $fh;
7844 }
7845
7846 # bash completion helper
7847
7848 sub complete_backup_archives {
7849 my ($cmdname, $pname, $cvalue) = @_;
7850
7851 my $cfg = PVE::Storage::config();
7852
7853 my $storeid;
7854
7855 if ($cvalue =~ m/^([^:]+):/) {
7856 $storeid = $1;
7857 }
7858
7859 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
7860
7861 my $res = [];
7862 foreach my $id (keys %$data) {
7863 foreach my $item (@{$data->{$id}}) {
7864 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
7865 push @$res, $item->{volid} if defined($item->{volid});
7866 }
7867 }
7868
7869 return $res;
7870 }
7871
7872 my $complete_vmid_full = sub {
7873 my ($running) = @_;
7874
7875 my $idlist = vmstatus();
7876
7877 my $res = [];
7878
7879 foreach my $id (keys %$idlist) {
7880 my $d = $idlist->{$id};
7881 if (defined($running)) {
7882 next if $d->{template};
7883 next if $running && $d->{status} ne 'running';
7884 next if !$running && $d->{status} eq 'running';
7885 }
7886 push @$res, $id;
7887
7888 }
7889 return $res;
7890 };
7891
7892 sub complete_vmid {
7893 return &$complete_vmid_full();
7894 }
7895
7896 sub complete_vmid_stopped {
7897 return &$complete_vmid_full(0);
7898 }
7899
7900 sub complete_vmid_running {
7901 return &$complete_vmid_full(1);
7902 }
7903
7904 sub complete_storage {
7905
7906 my $cfg = PVE::Storage::config();
7907 my $ids = $cfg->{ids};
7908
7909 my $res = [];
7910 foreach my $sid (keys %$ids) {
7911 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
7912 next if !$ids->{$sid}->{content}->{images};
7913 push @$res, $sid;
7914 }
7915
7916 return $res;
7917 }
7918
7919 sub complete_migration_storage {
7920 my ($cmd, $param, $current_value, $all_args) = @_;
7921
7922 my $targetnode = @$all_args[1];
7923
7924 my $cfg = PVE::Storage::config();
7925 my $ids = $cfg->{ids};
7926
7927 my $res = [];
7928 foreach my $sid (keys %$ids) {
7929 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
7930 next if !$ids->{$sid}->{content}->{images};
7931 push @$res, $sid;
7932 }
7933
7934 return $res;
7935 }
7936
7937 sub vm_is_paused {
7938 my ($vmid) = @_;
7939 my $qmpstatus = eval {
7940 PVE::QemuConfig::assert_config_exists_on_node($vmid);
7941 mon_cmd($vmid, "query-status");
7942 };
7943 warn "$@\n" if $@;
7944 return $qmpstatus && $qmpstatus->{status} eq "paused";
7945 }
7946
7947 sub check_volume_storage_type {
7948 my ($storecfg, $vol) = @_;
7949
7950 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
7951 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7952 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
7953
7954 die "storage '$storeid' does not support content-type '$vtype'\n"
7955 if !$scfg->{content}->{$vtype};
7956
7957 return 1;
7958 }
7959
7960 1;