]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
drives: ro: code reduction/refactor
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5
6 use Cwd 'abs_path';
7 use Digest::SHA;
8 use Fcntl ':flock';
9 use Fcntl;
10 use File::Basename;
11 use File::Copy qw(copy);
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use IO::Dir;
16 use IO::File;
17 use IO::Handle;
18 use IO::Select;
19 use IO::Socket::UNIX;
20 use IPC::Open3;
21 use JSON;
22 use MIME::Base64;
23 use POSIX;
24 use Storable qw(dclone);
25 use Time::HiRes qw(gettimeofday usleep);
26 use URI::Escape;
27 use UUID;
28
29 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
30 use PVE::CGroup;
31 use PVE::DataCenterConfig;
32 use PVE::Exception qw(raise raise_param_exc);
33 use PVE::Format qw(render_duration render_bytes);
34 use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
35 use PVE::INotify;
36 use PVE::JSONSchema qw(get_standard_option parse_property_string);
37 use PVE::ProcFSTools;
38 use PVE::PBSClient;
39 use PVE::RPCEnvironment;
40 use PVE::Storage;
41 use PVE::SysFSTools;
42 use PVE::Systemd;
43 use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
44
45 use PVE::QMPClient;
46 use PVE::QemuConfig;
47 use PVE::QemuServer::Helpers qw(min_version config_aware_timeout);
48 use PVE::QemuServer::Cloudinit;
49 use PVE::QemuServer::CGroup;
50 use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
51 use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
52 use PVE::QemuServer::Machine;
53 use PVE::QemuServer::Memory;
54 use PVE::QemuServer::Monitor qw(mon_cmd);
55 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
56 use PVE::QemuServer::USB qw(parse_usb_device);
57
58 my $have_sdn;
59 eval {
60 require PVE::Network::SDN::Zones;
61 $have_sdn = 1;
62 };
63
64 my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
65 my $OVMF = {
66 x86_64 => {
67 '4m-no-smm' => [
68 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
69 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
70 ],
71 '4m-no-smm-ms' => [
72 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
73 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
74 ],
75 '4m' => [
76 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
77 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
78 ],
79 '4m-ms' => [
80 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
81 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
82 ],
83 default => [
84 "$EDK2_FW_BASE/OVMF_CODE.fd",
85 "$EDK2_FW_BASE/OVMF_VARS.fd",
86 ],
87 },
88 aarch64 => {
89 default => [
90 "$EDK2_FW_BASE/AAVMF_CODE.fd",
91 "$EDK2_FW_BASE/AAVMF_VARS.fd",
92 ],
93 },
94 };
95
96 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
97
98 # Note about locking: we use flock on the config file protect against concurent actions.
99 # Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
100 # 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
101 # But you can ignore this kind of lock with the --skiplock flag.
102
103 cfs_register_file('/qemu-server/',
104 \&parse_vm_config,
105 \&write_vm_config);
106
107 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
108 description => "Some command save/restore state from this location.",
109 type => 'string',
110 maxLength => 128,
111 optional => 1,
112 });
113
114 PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
115 description => "Specifies the Qemu machine type.",
116 type => 'string',
117 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
118 maxLength => 40,
119 optional => 1,
120 });
121
122
123 sub map_storage {
124 my ($map, $source) = @_;
125
126 return $source if !defined($map);
127
128 return $map->{entries}->{$source}
129 if $map->{entries} && defined($map->{entries}->{$source});
130
131 return $map->{default} if $map->{default};
132
133 # identity (fallback)
134 return $source;
135 }
136
137 PVE::JSONSchema::register_standard_option('pve-targetstorage', {
138 description => "Mapping from source to target storages. Providing only a single storage ID maps all source storages to that storage. Providing the special value '1' will map each source storage to itself.",
139 type => 'string',
140 format => 'storagepair-list',
141 optional => 1,
142 });
143
144 #no warnings 'redefine';
145
146 my $nodename_cache;
147 sub nodename {
148 $nodename_cache //= PVE::INotify::nodename();
149 return $nodename_cache;
150 }
151
152 my $watchdog_fmt = {
153 model => {
154 default_key => 1,
155 type => 'string',
156 enum => [qw(i6300esb ib700)],
157 description => "Watchdog type to emulate.",
158 default => 'i6300esb',
159 optional => 1,
160 },
161 action => {
162 type => 'string',
163 enum => [qw(reset shutdown poweroff pause debug none)],
164 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
165 optional => 1,
166 },
167 };
168 PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
169
170 my $agent_fmt = {
171 enabled => {
172 description => "Enable/disable communication with a Qemu Guest Agent (QGA) running in the VM.",
173 type => 'boolean',
174 default => 0,
175 default_key => 1,
176 },
177 fstrim_cloned_disks => {
178 description => "Run fstrim after moving a disk or migrating the VM.",
179 type => 'boolean',
180 optional => 1,
181 default => 0
182 },
183 type => {
184 description => "Select the agent type",
185 type => 'string',
186 default => 'virtio',
187 optional => 1,
188 enum => [qw(virtio isa)],
189 },
190 };
191
192 my $vga_fmt = {
193 type => {
194 description => "Select the VGA type.",
195 type => 'string',
196 default => 'std',
197 optional => 1,
198 default_key => 1,
199 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio vmware)],
200 },
201 memory => {
202 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
203 type => 'integer',
204 optional => 1,
205 minimum => 4,
206 maximum => 512,
207 },
208 };
209
210 my $ivshmem_fmt = {
211 size => {
212 type => 'integer',
213 minimum => 1,
214 description => "The size of the file in MB.",
215 },
216 name => {
217 type => 'string',
218 pattern => '[a-zA-Z0-9\-]+',
219 optional => 1,
220 format_description => 'string',
221 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
222 },
223 };
224
225 my $audio_fmt = {
226 device => {
227 type => 'string',
228 enum => [qw(ich9-intel-hda intel-hda AC97)],
229 description => "Configure an audio device."
230 },
231 driver => {
232 type => 'string',
233 enum => ['spice', 'none'],
234 default => 'spice',
235 optional => 1,
236 description => "Driver backend for the audio device."
237 },
238 };
239
240 my $spice_enhancements_fmt = {
241 foldersharing => {
242 type => 'boolean',
243 optional => 1,
244 default => '0',
245 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
246 },
247 videostreaming => {
248 type => 'string',
249 enum => ['off', 'all', 'filter'],
250 default => 'off',
251 optional => 1,
252 description => "Enable video streaming. Uses compression for detected video streams."
253 },
254 };
255
256 my $rng_fmt = {
257 source => {
258 type => 'string',
259 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
260 default_key => 1,
261 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
262 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
263 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
264 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
265 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
266 ." a hardware RNG from the host.",
267 },
268 max_bytes => {
269 type => 'integer',
270 description => "Maximum bytes of entropy allowed to get injected into the guest every"
271 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
272 ." `0` to disable limiting (potentially dangerous!).",
273 optional => 1,
274
275 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
276 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
277 # reading from /dev/urandom
278 default => 1024,
279 },
280 period => {
281 type => 'integer',
282 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
283 ." the guest to retrieve another 'max_bytes' of entropy.",
284 optional => 1,
285 default => 1000,
286 },
287 };
288
289 my $confdesc = {
290 onboot => {
291 optional => 1,
292 type => 'boolean',
293 description => "Specifies whether a VM will be started during system bootup.",
294 default => 0,
295 },
296 autostart => {
297 optional => 1,
298 type => 'boolean',
299 description => "Automatic restart after crash (currently ignored).",
300 default => 0,
301 },
302 hotplug => {
303 optional => 1,
304 type => 'string', format => 'pve-hotplug-features',
305 description => "Selectively enable hotplug features. This is a comma separated list of"
306 ." hotplug features: 'network', 'disk', 'cpu', 'memory' and 'usb'. Use '0' to disable"
307 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`.",
308 default => 'network,disk,usb',
309 },
310 reboot => {
311 optional => 1,
312 type => 'boolean',
313 description => "Allow reboot. If set to '0' the VM exit on reboot.",
314 default => 1,
315 },
316 lock => {
317 optional => 1,
318 type => 'string',
319 description => "Lock/unlock the VM.",
320 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
321 },
322 cpulimit => {
323 optional => 1,
324 type => 'number',
325 description => "Limit of CPU usage.",
326 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
327 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
328 minimum => 0,
329 maximum => 128,
330 default => 0,
331 },
332 cpuunits => {
333 optional => 1,
334 type => 'integer',
335 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
336 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
337 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
338 ." weights of all the other running VMs.",
339 minimum => 2,
340 maximum => 262144,
341 default => 'cgroup v1: 1024, cgroup v2: 100',
342 },
343 memory => {
344 optional => 1,
345 type => 'integer',
346 description => "Amount of RAM for the VM in MB. This is the maximum available memory when"
347 ." you use the balloon device.",
348 minimum => 16,
349 default => 512,
350 },
351 balloon => {
352 optional => 1,
353 type => 'integer',
354 description => "Amount of target RAM for the VM in MB. Using zero disables the ballon driver.",
355 minimum => 0,
356 },
357 shares => {
358 optional => 1,
359 type => 'integer',
360 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
361 ." more memory this VM gets. Number is relative to weights of all other running VMs."
362 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
363 minimum => 0,
364 maximum => 50000,
365 default => 1000,
366 },
367 keyboard => {
368 optional => 1,
369 type => 'string',
370 description => "Keyboard layout for VNC server. The default is read from the"
371 ."'/etc/pve/datacenter.cfg' configuration file. It should not be necessary to set it.",
372 enum => PVE::Tools::kvmkeymaplist(),
373 default => undef,
374 },
375 name => {
376 optional => 1,
377 type => 'string', format => 'dns-name',
378 description => "Set a name for the VM. Only used on the configuration web interface.",
379 },
380 scsihw => {
381 optional => 1,
382 type => 'string',
383 description => "SCSI controller model",
384 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
385 default => 'lsi',
386 },
387 description => {
388 optional => 1,
389 type => 'string',
390 description => "Description for the VM. Shown in the web-interface VM's summary."
391 ." This is saved as comment inside the configuration file.",
392 maxLength => 1024 * 8,
393 },
394 ostype => {
395 optional => 1,
396 type => 'string',
397 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
398 description => "Specify guest operating system.",
399 verbose_description => <<EODESC,
400 Specify guest operating system. This is used to enable special
401 optimization/features for specific operating systems:
402
403 [horizontal]
404 other;; unspecified OS
405 wxp;; Microsoft Windows XP
406 w2k;; Microsoft Windows 2000
407 w2k3;; Microsoft Windows 2003
408 w2k8;; Microsoft Windows 2008
409 wvista;; Microsoft Windows Vista
410 win7;; Microsoft Windows 7
411 win8;; Microsoft Windows 8/2012/2012r2
412 win10;; Microsoft Windows 10/2016/2019
413 win11;; Microsoft Windows 11/2022
414 l24;; Linux 2.4 Kernel
415 l26;; Linux 2.6 - 5.X Kernel
416 solaris;; Solaris/OpenSolaris/OpenIndiania kernel
417 EODESC
418 },
419 boot => {
420 optional => 1,
421 type => 'string', format => 'pve-qm-boot',
422 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
423 ." key or 'legacy=' is deprecated.",
424 },
425 bootdisk => {
426 optional => 1,
427 type => 'string', format => 'pve-qm-bootdisk',
428 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
429 pattern => '(ide|sata|scsi|virtio)\d+',
430 },
431 smp => {
432 optional => 1,
433 type => 'integer',
434 description => "The number of CPUs. Please use option -sockets instead.",
435 minimum => 1,
436 default => 1,
437 },
438 sockets => {
439 optional => 1,
440 type => 'integer',
441 description => "The number of CPU sockets.",
442 minimum => 1,
443 default => 1,
444 },
445 cores => {
446 optional => 1,
447 type => 'integer',
448 description => "The number of cores per socket.",
449 minimum => 1,
450 default => 1,
451 },
452 numa => {
453 optional => 1,
454 type => 'boolean',
455 description => "Enable/disable NUMA.",
456 default => 0,
457 },
458 hugepages => {
459 optional => 1,
460 type => 'string',
461 description => "Enable/disable hugepages memory.",
462 enum => [qw(any 2 1024)],
463 },
464 keephugepages => {
465 optional => 1,
466 type => 'boolean',
467 default => 0,
468 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
469 ." after VM shutdown and can be used for subsequent starts.",
470 },
471 vcpus => {
472 optional => 1,
473 type => 'integer',
474 description => "Number of hotplugged vcpus.",
475 minimum => 1,
476 default => 0,
477 },
478 acpi => {
479 optional => 1,
480 type => 'boolean',
481 description => "Enable/disable ACPI.",
482 default => 1,
483 },
484 agent => {
485 optional => 1,
486 description => "Enable/disable communication with the Qemu Guest Agent and its properties.",
487 type => 'string',
488 format => $agent_fmt,
489 },
490 kvm => {
491 optional => 1,
492 type => 'boolean',
493 description => "Enable/disable KVM hardware virtualization.",
494 default => 1,
495 },
496 tdf => {
497 optional => 1,
498 type => 'boolean',
499 description => "Enable/disable time drift fix.",
500 default => 0,
501 },
502 localtime => {
503 optional => 1,
504 type => 'boolean',
505 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
506 ." the `ostype` indicates a Microsoft Windows OS.",
507 },
508 freeze => {
509 optional => 1,
510 type => 'boolean',
511 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
512 },
513 vga => {
514 optional => 1,
515 type => 'string', format => $vga_fmt,
516 description => "Configure the VGA hardware.",
517 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
518 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
519 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
520 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
521 ." display server. For win* OS you can select how many independent displays you want,"
522 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
523 ." using a serial device as terminal.",
524 },
525 watchdog => {
526 optional => 1,
527 type => 'string', format => 'pve-qm-watchdog',
528 description => "Create a virtual hardware watchdog device.",
529 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
530 ." action), the watchdog must be periodically polled by an agent inside the guest or"
531 ." else the watchdog will reset the guest (or execute the respective action specified)",
532 },
533 startdate => {
534 optional => 1,
535 type => 'string',
536 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
537 description => "Set the initial date of the real time clock. Valid format for date are:"
538 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
539 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
540 default => 'now',
541 },
542 startup => get_standard_option('pve-startup-order'),
543 template => {
544 optional => 1,
545 type => 'boolean',
546 description => "Enable/disable Template.",
547 default => 0,
548 },
549 args => {
550 optional => 1,
551 type => 'string',
552 description => "Arbitrary arguments passed to kvm.",
553 verbose_description => <<EODESCR,
554 Arbitrary arguments passed to kvm, for example:
555
556 args: -no-reboot -no-hpet
557
558 NOTE: this option is for experts only.
559 EODESCR
560 },
561 tablet => {
562 optional => 1,
563 type => 'boolean',
564 default => 1,
565 description => "Enable/disable the USB tablet device.",
566 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
567 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
568 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
569 ." may consider disabling this to save some context switches. This is turned off by"
570 ." default if you use spice (`qm set <vmid> --vga qxl`).",
571 },
572 migrate_speed => {
573 optional => 1,
574 type => 'integer',
575 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
576 minimum => 0,
577 default => 0,
578 },
579 migrate_downtime => {
580 optional => 1,
581 type => 'number',
582 description => "Set maximum tolerated downtime (in seconds) for migrations.",
583 minimum => 0,
584 default => 0.1,
585 },
586 cdrom => {
587 optional => 1,
588 type => 'string', format => 'pve-qm-ide',
589 typetext => '<volume>',
590 description => "This is an alias for option -ide2",
591 },
592 cpu => {
593 optional => 1,
594 description => "Emulated CPU type.",
595 type => 'string',
596 format => 'pve-vm-cpu-conf',
597 },
598 parent => get_standard_option('pve-snapshot-name', {
599 optional => 1,
600 description => "Parent snapshot name. This is used internally, and should not be modified.",
601 }),
602 snaptime => {
603 optional => 1,
604 description => "Timestamp for snapshots.",
605 type => 'integer',
606 minimum => 0,
607 },
608 vmstate => {
609 optional => 1,
610 type => 'string', format => 'pve-volume-id',
611 description => "Reference to a volume which stores the VM state. This is used internally"
612 ." for snapshots.",
613 },
614 vmstatestorage => get_standard_option('pve-storage-id', {
615 description => "Default storage for VM state volumes/files.",
616 optional => 1,
617 }),
618 runningmachine => get_standard_option('pve-qemu-machine', {
619 description => "Specifies the QEMU machine type of the running vm. This is used internally"
620 ." for snapshots.",
621 }),
622 runningcpu => {
623 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
624 ." internally for snapshots.",
625 optional => 1,
626 type => 'string',
627 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
628 format_description => 'QEMU -cpu parameter'
629 },
630 machine => get_standard_option('pve-qemu-machine'),
631 arch => {
632 description => "Virtual processor architecture. Defaults to the host.",
633 optional => 1,
634 type => 'string',
635 enum => [qw(x86_64 aarch64)],
636 },
637 smbios1 => {
638 description => "Specify SMBIOS type 1 fields.",
639 type => 'string', format => 'pve-qm-smbios1',
640 maxLength => 512,
641 optional => 1,
642 },
643 protection => {
644 optional => 1,
645 type => 'boolean',
646 description => "Sets the protection flag of the VM. This will disable the remove VM and"
647 ." remove disk operations.",
648 default => 0,
649 },
650 bios => {
651 optional => 1,
652 type => 'string',
653 enum => [ qw(seabios ovmf) ],
654 description => "Select BIOS implementation.",
655 default => 'seabios',
656 },
657 vmgenid => {
658 type => 'string',
659 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
660 format_description => 'UUID',
661 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
662 ." to disable explicitly.",
663 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
664 ." value identifier to the guest OS. This allows to notify the guest operating system"
665 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
666 ." execution or creation from a template). The guest operating system notices the"
667 ." change, and is then able to react as appropriate by marking its copies of"
668 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
669 ."Note that auto-creation only works when done through API/CLI create or update methods"
670 .", but not when manually editing the config file.",
671 default => "1 (autogenerated)",
672 optional => 1,
673 },
674 hookscript => {
675 type => 'string',
676 format => 'pve-volume-id',
677 optional => 1,
678 description => "Script that will be executed during various steps in the vms lifetime.",
679 },
680 ivshmem => {
681 type => 'string',
682 format => $ivshmem_fmt,
683 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
684 ." the host.",
685 optional => 1,
686 },
687 audio0 => {
688 type => 'string',
689 format => $audio_fmt,
690 description => "Configure a audio device, useful in combination with QXL/Spice.",
691 optional => 1
692 },
693 spice_enhancements => {
694 type => 'string',
695 format => $spice_enhancements_fmt,
696 description => "Configure additional enhancements for SPICE.",
697 optional => 1
698 },
699 tags => {
700 type => 'string', format => 'pve-tag-list',
701 description => 'Tags of the VM. This is only meta information.',
702 optional => 1,
703 },
704 rng0 => {
705 type => 'string',
706 format => $rng_fmt,
707 description => "Configure a VirtIO-based Random Number Generator.",
708 optional => 1,
709 },
710 };
711
712 my $cicustom_fmt = {
713 meta => {
714 type => 'string',
715 optional => 1,
716 description => 'Specify a custom file containing all meta data passed to the VM via"
717 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
718 format => 'pve-volume-id',
719 format_description => 'volume',
720 },
721 network => {
722 type => 'string',
723 optional => 1,
724 description => 'Specify a custom file containing all network data passed to the VM via'
725 .' cloud-init.',
726 format => 'pve-volume-id',
727 format_description => 'volume',
728 },
729 user => {
730 type => 'string',
731 optional => 1,
732 description => 'Specify a custom file containing all user data passed to the VM via'
733 .' cloud-init.',
734 format => 'pve-volume-id',
735 format_description => 'volume',
736 },
737 };
738 PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
739
740 my $confdesc_cloudinit = {
741 citype => {
742 optional => 1,
743 type => 'string',
744 description => 'Specifies the cloud-init configuration format. The default depends on the'
745 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
746 .' and `configdrive2` for windows.',
747 enum => ['configdrive2', 'nocloud', 'opennebula'],
748 },
749 ciuser => {
750 optional => 1,
751 type => 'string',
752 description => "cloud-init: User name to change ssh keys and password for instead of the"
753 ." image's configured default user.",
754 },
755 cipassword => {
756 optional => 1,
757 type => 'string',
758 description => 'cloud-init: Password to assign the user. Using this is generally not'
759 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
760 .' support hashed passwords.',
761 },
762 cicustom => {
763 optional => 1,
764 type => 'string',
765 description => 'cloud-init: Specify custom files to replace the automatically generated'
766 .' ones at start.',
767 format => 'pve-qm-cicustom',
768 },
769 searchdomain => {
770 optional => 1,
771 type => 'string',
772 description => "cloud-init: Sets DNS search domains for a container. Create will'
773 .' automatically use the setting from the host if neither searchdomain nor nameserver'
774 .' are set.",
775 },
776 nameserver => {
777 optional => 1,
778 type => 'string', format => 'address-list',
779 description => "cloud-init: Sets DNS server IP address for a container. Create will'
780 .' automatically use the setting from the host if neither searchdomain nor nameserver'
781 .' are set.",
782 },
783 sshkeys => {
784 optional => 1,
785 type => 'string',
786 format => 'urlencoded',
787 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
788 },
789 };
790
791 # what about other qemu settings ?
792 #cpu => 'string',
793 #machine => 'string',
794 #fda => 'file',
795 #fdb => 'file',
796 #mtdblock => 'file',
797 #sd => 'file',
798 #pflash => 'file',
799 #snapshot => 'bool',
800 #bootp => 'file',
801 ##tftp => 'dir',
802 ##smb => 'dir',
803 #kernel => 'file',
804 #append => 'string',
805 #initrd => 'file',
806 ##soundhw => 'string',
807
808 while (my ($k, $v) = each %$confdesc) {
809 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
810 }
811
812 my $MAX_USB_DEVICES = 5;
813 my $MAX_NETS = 32;
814 my $MAX_SERIAL_PORTS = 4;
815 my $MAX_PARALLEL_PORTS = 3;
816 my $MAX_NUMA = 8;
817
818 my $numa_fmt = {
819 cpus => {
820 type => "string",
821 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
822 description => "CPUs accessing this NUMA node.",
823 format_description => "id[-id];...",
824 },
825 memory => {
826 type => "number",
827 description => "Amount of memory this NUMA node provides.",
828 optional => 1,
829 },
830 hostnodes => {
831 type => "string",
832 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
833 description => "Host NUMA nodes to use.",
834 format_description => "id[-id];...",
835 optional => 1,
836 },
837 policy => {
838 type => 'string',
839 enum => [qw(preferred bind interleave)],
840 description => "NUMA allocation policy.",
841 optional => 1,
842 },
843 };
844 PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
845 my $numadesc = {
846 optional => 1,
847 type => 'string', format => $numa_fmt,
848 description => "NUMA topology.",
849 };
850 PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
851
852 for (my $i = 0; $i < $MAX_NUMA; $i++) {
853 $confdesc->{"numa$i"} = $numadesc;
854 }
855
856 my $nic_model_list = [
857 'e1000',
858 'e1000-82540em',
859 'e1000-82544gc',
860 'e1000-82545em',
861 'e1000e',
862 'i82551',
863 'i82557b',
864 'i82559er',
865 'ne2k_isa',
866 'ne2k_pci',
867 'pcnet',
868 'rtl8139',
869 'virtio',
870 'vmxnet3',
871 ];
872 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
873
874 my $net_fmt_bridge_descr = <<__EOD__;
875 Bridge to attach the network device to. The Proxmox VE standard bridge
876 is called 'vmbr0'.
877
878 If you do not specify a bridge, we create a kvm user (NATed) network
879 device, which provides DHCP and DNS services. The following addresses
880 are used:
881
882 10.0.2.2 Gateway
883 10.0.2.3 DNS Server
884 10.0.2.4 SMB Server
885
886 The DHCP server assign addresses to the guest starting from 10.0.2.15.
887 __EOD__
888
889 my $net_fmt = {
890 macaddr => get_standard_option('mac-addr', {
891 description => "MAC address. That address must be unique withing your network. This is"
892 ." automatically generated if not specified.",
893 }),
894 model => {
895 type => 'string',
896 description => "Network Card Model. The 'virtio' model provides the best performance with"
897 ." very low CPU overhead. If your guest does not support this driver, it is usually"
898 ." best to use 'e1000'.",
899 enum => $nic_model_list,
900 default_key => 1,
901 },
902 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
903 bridge => {
904 type => 'string',
905 description => $net_fmt_bridge_descr,
906 format_description => 'bridge',
907 pattern => '[-_.\w\d]+',
908 optional => 1,
909 },
910 queues => {
911 type => 'integer',
912 minimum => 0, maximum => 16,
913 description => 'Number of packet queues to be used on the device.',
914 optional => 1,
915 },
916 rate => {
917 type => 'number',
918 minimum => 0,
919 description => "Rate limit in mbps (megabytes per second) as floating point number.",
920 optional => 1,
921 },
922 tag => {
923 type => 'integer',
924 minimum => 1, maximum => 4094,
925 description => 'VLAN tag to apply to packets on this interface.',
926 optional => 1,
927 },
928 trunks => {
929 type => 'string',
930 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
931 description => 'VLAN trunks to pass through this interface.',
932 format_description => 'vlanid[;vlanid...]',
933 optional => 1,
934 },
935 firewall => {
936 type => 'boolean',
937 description => 'Whether this interface should be protected by the firewall.',
938 optional => 1,
939 },
940 link_down => {
941 type => 'boolean',
942 description => 'Whether this interface should be disconnected (like pulling the plug).',
943 optional => 1,
944 },
945 mtu => {
946 type => 'integer',
947 minimum => 1, maximum => 65520,
948 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
949 optional => 1,
950 },
951 };
952
953 my $netdesc = {
954 optional => 1,
955 type => 'string', format => $net_fmt,
956 description => "Specify network devices.",
957 };
958
959 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
960
961 my $ipconfig_fmt = {
962 ip => {
963 type => 'string',
964 format => 'pve-ipv4-config',
965 format_description => 'IPv4Format/CIDR',
966 description => 'IPv4 address in CIDR format.',
967 optional => 1,
968 default => 'dhcp',
969 },
970 gw => {
971 type => 'string',
972 format => 'ipv4',
973 format_description => 'GatewayIPv4',
974 description => 'Default gateway for IPv4 traffic.',
975 optional => 1,
976 requires => 'ip',
977 },
978 ip6 => {
979 type => 'string',
980 format => 'pve-ipv6-config',
981 format_description => 'IPv6Format/CIDR',
982 description => 'IPv6 address in CIDR format.',
983 optional => 1,
984 default => 'dhcp',
985 },
986 gw6 => {
987 type => 'string',
988 format => 'ipv6',
989 format_description => 'GatewayIPv6',
990 description => 'Default gateway for IPv6 traffic.',
991 optional => 1,
992 requires => 'ip6',
993 },
994 };
995 PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
996 my $ipconfigdesc = {
997 optional => 1,
998 type => 'string', format => 'pve-qm-ipconfig',
999 description => <<'EODESCR',
1000 cloud-init: Specify IP addresses and gateways for the corresponding interface.
1001
1002 IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1003
1004 The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1005 gateway should be provided.
1006 For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1007 cloud-init 19.4 or newer.
1008
1009 If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1010 dhcp on IPv4.
1011 EODESCR
1012 };
1013 PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1014
1015 for (my $i = 0; $i < $MAX_NETS; $i++) {
1016 $confdesc->{"net$i"} = $netdesc;
1017 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1018 }
1019
1020 foreach my $key (keys %$confdesc_cloudinit) {
1021 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1022 }
1023
1024 PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1025 sub verify_volume_id_or_qm_path {
1026 my ($volid, $noerr) = @_;
1027
1028 if ($volid eq 'none' || $volid eq 'cdrom' || $volid =~ m|^/|) {
1029 return $volid;
1030 }
1031
1032 # if its neither 'none' nor 'cdrom' nor a path, check if its a volume-id
1033 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1034 if ($@) {
1035 return if $noerr;
1036 die $@;
1037 }
1038 return $volid;
1039 }
1040
1041 my $usb_fmt = {
1042 host => {
1043 default_key => 1,
1044 type => 'string', format => 'pve-qm-usb-device',
1045 format_description => 'HOSTUSBDEVICE|spice',
1046 description => <<EODESCR,
1047 The Host USB device or port or the value 'spice'. HOSTUSBDEVICE syntax is:
1048
1049 'bus-port(.port)*' (decimal numbers) or
1050 'vendor_id:product_id' (hexadeciaml numbers) or
1051 'spice'
1052
1053 You can use the 'lsusb -t' command to list existing usb devices.
1054
1055 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1056 machines - use with special care.
1057
1058 The value 'spice' can be used to add a usb redirection devices for spice.
1059 EODESCR
1060 },
1061 usb3 => {
1062 optional => 1,
1063 type => 'boolean',
1064 description => "Specifies whether if given host option is a USB3 device or port.",
1065 default => 0,
1066 },
1067 };
1068
1069 my $usbdesc = {
1070 optional => 1,
1071 type => 'string', format => $usb_fmt,
1072 description => "Configure an USB device (n is 0 to 4).",
1073 };
1074 PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
1075
1076 my $serialdesc = {
1077 optional => 1,
1078 type => 'string',
1079 pattern => '(/dev/.+|socket)',
1080 description => "Create a serial device inside the VM (n is 0 to 3)",
1081 verbose_description => <<EODESCR,
1082 Create a serial device inside the VM (n is 0 to 3), and pass through a
1083 host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1084 host side (use 'qm terminal' to open a terminal connection).
1085
1086 NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1087 use with special care.
1088
1089 CAUTION: Experimental! User reported problems with this option.
1090 EODESCR
1091 };
1092
1093 my $paralleldesc= {
1094 optional => 1,
1095 type => 'string',
1096 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1097 description => "Map host parallel devices (n is 0 to 2).",
1098 verbose_description => <<EODESCR,
1099 Map host parallel devices (n is 0 to 2).
1100
1101 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1102 machines - use with special care.
1103
1104 CAUTION: Experimental! User reported problems with this option.
1105 EODESCR
1106 };
1107
1108 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1109 $confdesc->{"parallel$i"} = $paralleldesc;
1110 }
1111
1112 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1113 $confdesc->{"serial$i"} = $serialdesc;
1114 }
1115
1116 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1117 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1118 }
1119
1120 for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1121 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1122 }
1123
1124 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1125 $confdesc->{"usb$i"} = $usbdesc;
1126 }
1127
1128 my $boot_fmt = {
1129 legacy => {
1130 optional => 1,
1131 default_key => 1,
1132 type => 'string',
1133 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1134 . " Deprecated, use 'order=' instead.",
1135 pattern => '[acdn]{1,4}',
1136 format_description => "[acdn]{1,4}",
1137
1138 # note: this is also the fallback if boot: is not given at all
1139 default => 'cdn',
1140 },
1141 order => {
1142 optional => 1,
1143 type => 'string',
1144 format => 'pve-qm-bootdev-list',
1145 format_description => "device[;device...]",
1146 description => <<EODESC,
1147 The guest will attempt to boot from devices in the order they appear here.
1148
1149 Disks, optical drives and passed-through storage USB devices will be directly
1150 booted from, NICs will load PXE, and PCIe devices will either behave like disks
1151 (e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1152
1153 Note that only devices in this list will be marked as bootable and thus loaded
1154 by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1155 (e.g. software-raid), you need to specify all of them here.
1156
1157 Overrides the deprecated 'legacy=[acdn]*' value when given.
1158 EODESC
1159 },
1160 };
1161 PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1162
1163 PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1164 sub verify_bootdev {
1165 my ($dev, $noerr) = @_;
1166
1167 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1168 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1169
1170 my $check = sub {
1171 my ($base) = @_;
1172 return 0 if $dev !~ m/^$base\d+$/;
1173 return 0 if !$confdesc->{$dev};
1174 return 1;
1175 };
1176
1177 return $dev if $check->("net");
1178 return $dev if $check->("usb");
1179 return $dev if $check->("hostpci");
1180
1181 return if $noerr;
1182 die "invalid boot device '$dev'\n";
1183 }
1184
1185 sub print_bootorder {
1186 my ($devs) = @_;
1187 return "" if !@$devs;
1188 my $data = { order => join(';', @$devs) };
1189 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1190 }
1191
1192 my $kvm_api_version = 0;
1193
1194 sub kvm_version {
1195 return $kvm_api_version if $kvm_api_version;
1196
1197 open my $fh, '<', '/dev/kvm' or return;
1198
1199 # 0xae00 => KVM_GET_API_VERSION
1200 $kvm_api_version = ioctl($fh, 0xae00, 0);
1201 close($fh);
1202
1203 return $kvm_api_version;
1204 }
1205
1206 my $kvm_user_version = {};
1207 my $kvm_mtime = {};
1208
1209 sub kvm_user_version {
1210 my ($binary) = @_;
1211
1212 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1213 my $st = stat($binary);
1214
1215 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1216 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1217 $cachedmtime == $st->mtime;
1218
1219 $kvm_user_version->{$binary} = 'unknown';
1220 $kvm_mtime->{$binary} = $st->mtime;
1221
1222 my $code = sub {
1223 my $line = shift;
1224 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1225 $kvm_user_version->{$binary} = $2;
1226 }
1227 };
1228
1229 eval { run_command([$binary, '--version'], outfunc => $code); };
1230 warn $@ if $@;
1231
1232 return $kvm_user_version->{$binary};
1233
1234 }
1235 my sub extract_version {
1236 my ($machine_type, $version) = @_;
1237 $version = kvm_user_version() if !defined($version);
1238 PVE::QemuServer::Machine::extract_version($machine_type, $version)
1239 }
1240
1241 sub kernel_has_vhost_net {
1242 return -c '/dev/vhost-net';
1243 }
1244
1245 sub option_exists {
1246 my $key = shift;
1247 return defined($confdesc->{$key});
1248 }
1249
1250 my $cdrom_path;
1251 sub get_cdrom_path {
1252
1253 return $cdrom_path if $cdrom_path;
1254
1255 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
1256 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
1257 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
1258 }
1259
1260 sub get_iso_path {
1261 my ($storecfg, $vmid, $cdrom) = @_;
1262
1263 if ($cdrom eq 'cdrom') {
1264 return get_cdrom_path();
1265 } elsif ($cdrom eq 'none') {
1266 return '';
1267 } elsif ($cdrom =~ m|^/|) {
1268 return $cdrom;
1269 } else {
1270 return PVE::Storage::path($storecfg, $cdrom);
1271 }
1272 }
1273
1274 # try to convert old style file names to volume IDs
1275 sub filename_to_volume_id {
1276 my ($vmid, $file, $media) = @_;
1277
1278 if (!($file eq 'none' || $file eq 'cdrom' ||
1279 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1280
1281 return if $file =~ m|/|;
1282
1283 if ($media && $media eq 'cdrom') {
1284 $file = "local:iso/$file";
1285 } else {
1286 $file = "local:$vmid/$file";
1287 }
1288 }
1289
1290 return $file;
1291 }
1292
1293 sub verify_media_type {
1294 my ($opt, $vtype, $media) = @_;
1295
1296 return if !$media;
1297
1298 my $etype;
1299 if ($media eq 'disk') {
1300 $etype = 'images';
1301 } elsif ($media eq 'cdrom') {
1302 $etype = 'iso';
1303 } else {
1304 die "internal error";
1305 }
1306
1307 return if ($vtype eq $etype);
1308
1309 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1310 }
1311
1312 sub cleanup_drive_path {
1313 my ($opt, $storecfg, $drive) = @_;
1314
1315 # try to convert filesystem paths to volume IDs
1316
1317 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1318 ($drive->{file} !~ m|^/dev/.+|) &&
1319 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1320 ($drive->{file} !~ m/^\d+$/)) {
1321 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1322 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1323 if !$vtype;
1324 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1325 verify_media_type($opt, $vtype, $drive->{media});
1326 $drive->{file} = $volid;
1327 }
1328
1329 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1330 }
1331
1332 sub parse_hotplug_features {
1333 my ($data) = @_;
1334
1335 my $res = {};
1336
1337 return $res if $data eq '0';
1338
1339 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1340
1341 foreach my $feature (PVE::Tools::split_list($data)) {
1342 if ($feature =~ m/^(network|disk|cpu|memory|usb)$/) {
1343 $res->{$1} = 1;
1344 } else {
1345 die "invalid hotplug feature '$feature'\n";
1346 }
1347 }
1348 return $res;
1349 }
1350
1351 PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1352 sub pve_verify_hotplug_features {
1353 my ($value, $noerr) = @_;
1354
1355 return $value if parse_hotplug_features($value);
1356
1357 return if $noerr;
1358
1359 die "unable to parse hotplug option\n";
1360 }
1361
1362 sub scsi_inquiry {
1363 my($fh, $noerr) = @_;
1364
1365 my $SG_IO = 0x2285;
1366 my $SG_GET_VERSION_NUM = 0x2282;
1367
1368 my $versionbuf = "\x00" x 8;
1369 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1370 if (!$ret) {
1371 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1372 return;
1373 }
1374 my $version = unpack("I", $versionbuf);
1375 if ($version < 30000) {
1376 die "scsi generic interface too old\n" if !$noerr;
1377 return;
1378 }
1379
1380 my $buf = "\x00" x 36;
1381 my $sensebuf = "\x00" x 8;
1382 my $cmd = pack("C x3 C x1", 0x12, 36);
1383
1384 # see /usr/include/scsi/sg.h
1385 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1386
1387 my $packet = pack($sg_io_hdr_t, ord('S'), -3, length($cmd),
1388 length($sensebuf), 0, length($buf), $buf,
1389 $cmd, $sensebuf, 6000);
1390
1391 $ret = ioctl($fh, $SG_IO, $packet);
1392 if (!$ret) {
1393 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1394 return;
1395 }
1396
1397 my @res = unpack($sg_io_hdr_t, $packet);
1398 if ($res[17] || $res[18]) {
1399 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1400 return;
1401 }
1402
1403 my $res = {};
1404 (my $byte0, my $byte1, $res->{vendor},
1405 $res->{product}, $res->{revision}) = unpack("C C x6 A8 A16 A4", $buf);
1406
1407 $res->{removable} = $byte1 & 128 ? 1 : 0;
1408 $res->{type} = $byte0 & 31;
1409
1410 return $res;
1411 }
1412
1413 sub path_is_scsi {
1414 my ($path) = @_;
1415
1416 my $fh = IO::File->new("+<$path") || return;
1417 my $res = scsi_inquiry($fh, 1);
1418 close($fh);
1419
1420 return $res;
1421 }
1422
1423 sub print_tabletdevice_full {
1424 my ($conf, $arch) = @_;
1425
1426 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1427
1428 # we use uhci for old VMs because tablet driver was buggy in older qemu
1429 my $usbbus;
1430 if (PVE::QemuServer::Machine::machine_type_is_q35($conf) || $arch eq 'aarch64') {
1431 $usbbus = 'ehci';
1432 } else {
1433 $usbbus = 'uhci';
1434 }
1435
1436 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1437 }
1438
1439 sub print_keyboarddevice_full {
1440 my ($conf, $arch, $machine) = @_;
1441
1442 return if $arch ne 'aarch64';
1443
1444 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1445 }
1446
1447 my sub get_drive_id {
1448 my ($drive) = @_;
1449 return "$drive->{interface}$drive->{index}";
1450 }
1451
1452 sub print_drivedevice_full {
1453 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1454
1455 my $device = '';
1456 my $maxdev = 0;
1457
1458 my $drive_id = get_drive_id($drive);
1459 if ($drive->{interface} eq 'virtio') {
1460 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1461 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1462 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1463 } elsif ($drive->{interface} eq 'scsi') {
1464
1465 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1466 my $unit = $drive->{index} % $maxdev;
1467 my $devicetype = 'hd';
1468 my $path = '';
1469 if (drive_is_cdrom($drive)) {
1470 $devicetype = 'cd';
1471 } else {
1472 if ($drive->{file} =~ m|^/|) {
1473 $path = $drive->{file};
1474 if (my $info = path_is_scsi($path)) {
1475 if ($info->{type} == 0 && $drive->{scsiblock}) {
1476 $devicetype = 'block';
1477 } elsif ($info->{type} == 1) { # tape
1478 $devicetype = 'generic';
1479 }
1480 }
1481 } else {
1482 $path = PVE::Storage::path($storecfg, $drive->{file});
1483 }
1484
1485 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
1486 my $version = extract_version($machine_type, kvm_user_version());
1487 if ($path =~ m/^iscsi\:\/\// &&
1488 !min_version($version, 4, 1)) {
1489 $devicetype = 'generic';
1490 }
1491 }
1492
1493 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1494 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
1495 } else {
1496 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1497 .",lun=$drive->{index}";
1498 }
1499 $device .= ",drive=drive-$drive_id,id=$drive_id";
1500
1501 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1502 $device .= ",rotation_rate=1";
1503 }
1504 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1505
1506 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1507 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1508 my $controller = int($drive->{index} / $maxdev);
1509 my $unit = $drive->{index} % $maxdev;
1510 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1511
1512 $device = "ide-$devicetype";
1513 if ($drive->{interface} eq 'ide') {
1514 $device .= ",bus=ide.$controller,unit=$unit";
1515 } else {
1516 $device .= ",bus=ahci$controller.$unit";
1517 }
1518 $device .= ",drive=drive-$drive_id,id=$drive_id";
1519
1520 if ($devicetype eq 'hd') {
1521 if (my $model = $drive->{model}) {
1522 $model = URI::Escape::uri_unescape($model);
1523 $device .= ",model=$model";
1524 }
1525 if ($drive->{ssd}) {
1526 $device .= ",rotation_rate=1";
1527 }
1528 }
1529 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1530 } elsif ($drive->{interface} eq 'usb') {
1531 die "implement me";
1532 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1533 } else {
1534 die "unsupported interface type";
1535 }
1536
1537 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1538
1539 if (my $serial = $drive->{serial}) {
1540 $serial = URI::Escape::uri_unescape($serial);
1541 $device .= ",serial=$serial";
1542 }
1543
1544
1545 return $device;
1546 }
1547
1548 sub get_initiator_name {
1549 my $initiator;
1550
1551 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1552 while (defined(my $line = <$fh>)) {
1553 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1554 $initiator = $1;
1555 last;
1556 }
1557 $fh->close();
1558
1559 return $initiator;
1560 }
1561
1562 sub print_drive_commandline_full {
1563 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1564
1565 my $path;
1566 my $volid = $drive->{file};
1567 my $format = $drive->{format};
1568 my $drive_id = get_drive_id($drive);
1569
1570 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1571 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1572
1573 if (drive_is_cdrom($drive)) {
1574 $path = get_iso_path($storecfg, $vmid, $volid);
1575 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
1576 } else {
1577 if ($storeid) {
1578 $path = PVE::Storage::path($storecfg, $volid);
1579 $format //= qemu_img_format($scfg, $volname);
1580 } else {
1581 $path = $volid;
1582 $format //= "raw";
1583 }
1584 }
1585
1586 my $is_rbd = $path =~ m/^rbd:/;
1587
1588 my $opts = '';
1589 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1590 foreach my $o (@qemu_drive_options) {
1591 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1592 }
1593
1594 # snapshot only accepts on|off
1595 if (defined($drive->{snapshot})) {
1596 my $v = $drive->{snapshot} ? 'on' : 'off';
1597 $opts .= ",snapshot=$v";
1598 }
1599
1600 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1601 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
1602 }
1603
1604 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1605 my ($dir, $qmpname) = @$type;
1606 if (my $v = $drive->{"mbps$dir"}) {
1607 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1608 }
1609 if (my $v = $drive->{"mbps${dir}_max"}) {
1610 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1611 }
1612 if (my $v = $drive->{"bps${dir}_max_length"}) {
1613 $opts .= ",throttling.bps$qmpname-max-length=$v";
1614 }
1615 if (my $v = $drive->{"iops${dir}"}) {
1616 $opts .= ",throttling.iops$qmpname=$v";
1617 }
1618 if (my $v = $drive->{"iops${dir}_max"}) {
1619 $opts .= ",throttling.iops$qmpname-max=$v";
1620 }
1621 if (my $v = $drive->{"iops${dir}_max_length"}) {
1622 $opts .= ",throttling.iops$qmpname-max-length=$v";
1623 }
1624 }
1625
1626 if ($pbs_name) {
1627 $format = "rbd" if $is_rbd;
1628 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1629 if !$format;
1630 $opts .= ",format=alloc-track,file.driver=$format";
1631 } elsif ($format) {
1632 $opts .= ",format=$format";
1633 }
1634
1635 my $cache_direct = 0;
1636
1637 if (my $cache = $drive->{cache}) {
1638 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1639 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1640 $opts .= ",cache=none";
1641 $cache_direct = 1;
1642 }
1643
1644 # io_uring with cache mode writeback or writethrough on krbd will hang...
1645 my $rbd_no_io_uring = $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1646
1647 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1648 # sometimes, just plain disable...
1649 my $lvm_no_io_uring = $scfg && $scfg->{type} eq 'lvm';
1650
1651 if (!$drive->{aio}) {
1652 if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring) {
1653 # io_uring supports all cache modes
1654 $opts .= ",aio=io_uring";
1655 } else {
1656 # aio native works only with O_DIRECT
1657 if($cache_direct) {
1658 $opts .= ",aio=native";
1659 } else {
1660 $opts .= ",aio=threads";
1661 }
1662 }
1663 }
1664
1665 if (!drive_is_cdrom($drive)) {
1666 my $detectzeroes;
1667 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1668 $detectzeroes = 'off';
1669 } elsif ($drive->{discard}) {
1670 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1671 } else {
1672 # This used to be our default with discard not being specified:
1673 $detectzeroes = 'on';
1674 }
1675
1676 # note: 'detect-zeroes' works per blockdev and we want it to persist
1677 # after the alloc-track is removed, so put it on 'file' directly
1678 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1679 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1680 }
1681
1682 if ($pbs_name) {
1683 $opts .= ",backing=$pbs_name";
1684 $opts .= ",auto-remove=on";
1685 }
1686
1687 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1688 my $file_param = "file";
1689 if ($pbs_name) {
1690 # non-rbd drivers require the underlying file to be a seperate block
1691 # node, so add a second .file indirection
1692 $file_param .= ".file" if !$is_rbd;
1693 $file_param .= ".filename";
1694 }
1695 my $pathinfo = $path ? "$file_param=$path," : '';
1696
1697 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1698 }
1699
1700 sub print_pbs_blockdev {
1701 my ($pbs_conf, $pbs_name) = @_;
1702 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1703 $blockdev .= ",repository=$pbs_conf->{repository}";
1704 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1705 $blockdev .= ",archive=$pbs_conf->{archive}";
1706 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1707 return $blockdev;
1708 }
1709
1710 sub print_netdevice_full {
1711 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type) = @_;
1712
1713 my $device = $net->{model};
1714 if ($net->{model} eq 'virtio') {
1715 $device = 'virtio-net-pci';
1716 };
1717
1718 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1719 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1720 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1721 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1722 # and out of each queue plus one config interrupt and control vector queue
1723 my $vectors = $net->{queues} * 2 + 2;
1724 $tmpstr .= ",vectors=$vectors,mq=on";
1725 }
1726 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1727
1728 if (my $mtu = $net->{mtu}) {
1729 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1730 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1731 if ($mtu == 1) {
1732 $mtu = $bridge_mtu;
1733 } elsif ($mtu < 576) {
1734 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1735 } elsif ($mtu > $bridge_mtu) {
1736 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1737 }
1738 $tmpstr .= ",host_mtu=$mtu";
1739 } else {
1740 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1741 }
1742 }
1743
1744 if ($use_old_bios_files) {
1745 my $romfile;
1746 if ($device eq 'virtio-net-pci') {
1747 $romfile = 'pxe-virtio.rom';
1748 } elsif ($device eq 'e1000') {
1749 $romfile = 'pxe-e1000.rom';
1750 } elsif ($device eq 'e1000e') {
1751 $romfile = 'pxe-e1000e.rom';
1752 } elsif ($device eq 'ne2k') {
1753 $romfile = 'pxe-ne2k_pci.rom';
1754 } elsif ($device eq 'pcnet') {
1755 $romfile = 'pxe-pcnet.rom';
1756 } elsif ($device eq 'rtl8139') {
1757 $romfile = 'pxe-rtl8139.rom';
1758 }
1759 $tmpstr .= ",romfile=$romfile" if $romfile;
1760 }
1761
1762 return $tmpstr;
1763 }
1764
1765 sub print_netdev_full {
1766 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1767
1768 my $i = '';
1769 if ($netid =~ m/^net(\d+)$/) {
1770 $i = int($1);
1771 }
1772
1773 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1774
1775 my $ifname = "tap${vmid}i$i";
1776
1777 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1778 die "interface name '$ifname' is too long (max 15 character)\n"
1779 if length($ifname) >= 16;
1780
1781 my $vhostparam = '';
1782 if (is_native($arch)) {
1783 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1784 }
1785
1786 my $vmname = $conf->{name} || "vm$vmid";
1787
1788 my $netdev = "";
1789 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1790
1791 if ($net->{bridge}) {
1792 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1793 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1794 } else {
1795 $netdev = "type=user,id=$netid,hostname=$vmname";
1796 }
1797
1798 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1799
1800 return $netdev;
1801 }
1802
1803 my $vga_map = {
1804 'cirrus' => 'cirrus-vga',
1805 'std' => 'VGA',
1806 'vmware' => 'vmware-svga',
1807 'virtio' => 'virtio-vga',
1808 };
1809
1810 sub print_vga_device {
1811 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1812
1813 my $type = $vga_map->{$vga->{type}};
1814 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1815 $type = 'virtio-gpu';
1816 }
1817 my $vgamem_mb = $vga->{memory};
1818
1819 my $max_outputs = '';
1820 if ($qxlnum) {
1821 $type = $id ? 'qxl' : 'qxl-vga';
1822
1823 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1824 # set max outputs so linux can have up to 4 qxl displays with one device
1825 if (min_version($machine_version, 4, 1)) {
1826 $max_outputs = ",max_outputs=4";
1827 }
1828 }
1829 }
1830
1831 die "no devicetype for $vga->{type}\n" if !$type;
1832
1833 my $memory = "";
1834 if ($vgamem_mb) {
1835 if ($vga->{type} eq 'virtio') {
1836 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1837 $memory = ",max_hostmem=$bytes";
1838 } elsif ($qxlnum) {
1839 # from https://www.spice-space.org/multiple-monitors.html
1840 $memory = ",vgamem_mb=$vga->{memory}";
1841 my $ram = $vgamem_mb * 4;
1842 my $vram = $vgamem_mb * 2;
1843 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1844 } else {
1845 $memory = ",vgamem_mb=$vga->{memory}";
1846 }
1847 } elsif ($qxlnum && $id) {
1848 $memory = ",ram_size=67108864,vram_size=33554432";
1849 }
1850
1851 my $edidoff = "";
1852 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1853 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1854 }
1855
1856 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1857 my $vgaid = "vga" . ($id // '');
1858 my $pciaddr;
1859 if ($q35 && $vgaid eq 'vga') {
1860 # the first display uses pcie.0 bus on q35 machines
1861 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1862 } else {
1863 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1864 }
1865
1866 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1867 }
1868
1869 sub parse_number_sets {
1870 my ($set) = @_;
1871 my $res = [];
1872 foreach my $part (split(/;/, $set)) {
1873 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1874 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1875 push @$res, [ $1, $2 ];
1876 } else {
1877 die "invalid range: $part\n";
1878 }
1879 }
1880 return $res;
1881 }
1882
1883 sub parse_numa {
1884 my ($data) = @_;
1885
1886 my $res = parse_property_string($numa_fmt, $data);
1887 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1888 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
1889 return $res;
1890 }
1891
1892 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1893 sub parse_net {
1894 my ($data) = @_;
1895
1896 my $res = eval { parse_property_string($net_fmt, $data) };
1897 if ($@) {
1898 warn $@;
1899 return;
1900 }
1901 if (!defined($res->{macaddr})) {
1902 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1903 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1904 }
1905 return $res;
1906 }
1907
1908 # ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1909 sub parse_ipconfig {
1910 my ($data) = @_;
1911
1912 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1913 if ($@) {
1914 warn $@;
1915 return;
1916 }
1917
1918 if ($res->{gw} && !$res->{ip}) {
1919 warn 'gateway specified without specifying an IP address';
1920 return;
1921 }
1922 if ($res->{gw6} && !$res->{ip6}) {
1923 warn 'IPv6 gateway specified without specifying an IPv6 address';
1924 return;
1925 }
1926 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1927 warn 'gateway specified together with DHCP';
1928 return;
1929 }
1930 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1931 # gw6 + auto/dhcp
1932 warn "IPv6 gateway specified together with $res->{ip6} address";
1933 return;
1934 }
1935
1936 if (!$res->{ip} && !$res->{ip6}) {
1937 return { ip => 'dhcp', ip6 => 'dhcp' };
1938 }
1939
1940 return $res;
1941 }
1942
1943 sub print_net {
1944 my $net = shift;
1945
1946 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1947 }
1948
1949 sub add_random_macs {
1950 my ($settings) = @_;
1951
1952 foreach my $opt (keys %$settings) {
1953 next if $opt !~ m/^net(\d+)$/;
1954 my $net = parse_net($settings->{$opt});
1955 next if !$net;
1956 $settings->{$opt} = print_net($net);
1957 }
1958 }
1959
1960 sub vm_is_volid_owner {
1961 my ($storecfg, $vmid, $volid) = @_;
1962
1963 if ($volid !~ m|^/|) {
1964 my ($path, $owner);
1965 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
1966 if ($owner && ($owner == $vmid)) {
1967 return 1;
1968 }
1969 }
1970
1971 return;
1972 }
1973
1974 sub vmconfig_register_unused_drive {
1975 my ($storecfg, $vmid, $conf, $drive) = @_;
1976
1977 if (drive_is_cloudinit($drive)) {
1978 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
1979 warn $@ if $@;
1980 } elsif (!drive_is_cdrom($drive)) {
1981 my $volid = $drive->{file};
1982 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
1983 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
1984 }
1985 }
1986 }
1987
1988 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
1989 my $smbios1_fmt = {
1990 uuid => {
1991 type => 'string',
1992 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
1993 format_description => 'UUID',
1994 description => "Set SMBIOS1 UUID.",
1995 optional => 1,
1996 },
1997 version => {
1998 type => 'string',
1999 pattern => '[A-Za-z0-9+\/]+={0,2}',
2000 format_description => 'Base64 encoded string',
2001 description => "Set SMBIOS1 version.",
2002 optional => 1,
2003 },
2004 serial => {
2005 type => 'string',
2006 pattern => '[A-Za-z0-9+\/]+={0,2}',
2007 format_description => 'Base64 encoded string',
2008 description => "Set SMBIOS1 serial number.",
2009 optional => 1,
2010 },
2011 manufacturer => {
2012 type => 'string',
2013 pattern => '[A-Za-z0-9+\/]+={0,2}',
2014 format_description => 'Base64 encoded string',
2015 description => "Set SMBIOS1 manufacturer.",
2016 optional => 1,
2017 },
2018 product => {
2019 type => 'string',
2020 pattern => '[A-Za-z0-9+\/]+={0,2}',
2021 format_description => 'Base64 encoded string',
2022 description => "Set SMBIOS1 product ID.",
2023 optional => 1,
2024 },
2025 sku => {
2026 type => 'string',
2027 pattern => '[A-Za-z0-9+\/]+={0,2}',
2028 format_description => 'Base64 encoded string',
2029 description => "Set SMBIOS1 SKU string.",
2030 optional => 1,
2031 },
2032 family => {
2033 type => 'string',
2034 pattern => '[A-Za-z0-9+\/]+={0,2}',
2035 format_description => 'Base64 encoded string',
2036 description => "Set SMBIOS1 family string.",
2037 optional => 1,
2038 },
2039 base64 => {
2040 type => 'boolean',
2041 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2042 optional => 1,
2043 },
2044 };
2045
2046 sub parse_smbios1 {
2047 my ($data) = @_;
2048
2049 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2050 warn $@ if $@;
2051 return $res;
2052 }
2053
2054 sub print_smbios1 {
2055 my ($smbios1) = @_;
2056 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2057 }
2058
2059 PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2060
2061 sub parse_watchdog {
2062 my ($value) = @_;
2063
2064 return if !$value;
2065
2066 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2067 warn $@ if $@;
2068 return $res;
2069 }
2070
2071 sub parse_guest_agent {
2072 my ($conf) = @_;
2073
2074 return {} if !defined($conf->{agent});
2075
2076 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2077 warn $@ if $@;
2078
2079 # if the agent is disabled ignore the other potentially set properties
2080 return {} if !$res->{enabled};
2081 return $res;
2082 }
2083
2084 sub get_qga_key {
2085 my ($conf, $key) = @_;
2086 return undef if !defined($conf->{agent});
2087
2088 my $agent = parse_guest_agent($conf);
2089 return $agent->{$key};
2090 }
2091
2092 sub parse_vga {
2093 my ($value) = @_;
2094
2095 return {} if !$value;
2096 my $res = eval { parse_property_string($vga_fmt, $value) };
2097 warn $@ if $@;
2098 return $res;
2099 }
2100
2101 sub parse_rng {
2102 my ($value) = @_;
2103
2104 return if !$value;
2105
2106 my $res = eval { parse_property_string($rng_fmt, $value) };
2107 warn $@ if $@;
2108 return $res;
2109 }
2110
2111 PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
2112 sub verify_usb_device {
2113 my ($value, $noerr) = @_;
2114
2115 return $value if parse_usb_device($value);
2116
2117 return if $noerr;
2118
2119 die "unable to parse usb device\n";
2120 }
2121
2122 # add JSON properties for create and set function
2123 sub json_config_properties {
2124 my $prop = shift;
2125
2126 my $skip_json_config_opts = {
2127 parent => 1,
2128 snaptime => 1,
2129 vmstate => 1,
2130 runningmachine => 1,
2131 runningcpu => 1,
2132 };
2133
2134 foreach my $opt (keys %$confdesc) {
2135 next if $skip_json_config_opts->{$opt};
2136 $prop->{$opt} = $confdesc->{$opt};
2137 }
2138
2139 return $prop;
2140 }
2141
2142 # return copy of $confdesc_cloudinit to generate documentation
2143 sub cloudinit_config_properties {
2144
2145 return dclone($confdesc_cloudinit);
2146 }
2147
2148 sub check_type {
2149 my ($key, $value) = @_;
2150
2151 die "unknown setting '$key'\n" if !$confdesc->{$key};
2152
2153 my $type = $confdesc->{$key}->{type};
2154
2155 if (!defined($value)) {
2156 die "got undefined value\n";
2157 }
2158
2159 if ($value =~ m/[\n\r]/) {
2160 die "property contains a line feed\n";
2161 }
2162
2163 if ($type eq 'boolean') {
2164 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2165 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2166 die "type check ('boolean') failed - got '$value'\n";
2167 } elsif ($type eq 'integer') {
2168 return int($1) if $value =~ m/^(\d+)$/;
2169 die "type check ('integer') failed - got '$value'\n";
2170 } elsif ($type eq 'number') {
2171 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2172 die "type check ('number') failed - got '$value'\n";
2173 } elsif ($type eq 'string') {
2174 if (my $fmt = $confdesc->{$key}->{format}) {
2175 PVE::JSONSchema::check_format($fmt, $value);
2176 return $value;
2177 }
2178 $value =~ s/^\"(.*)\"$/$1/;
2179 return $value;
2180 } else {
2181 die "internal error"
2182 }
2183 }
2184
2185 sub destroy_vm {
2186 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2187
2188 my $conf = PVE::QemuConfig->load_config($vmid);
2189
2190 PVE::QemuConfig->check_lock($conf) if !$skiplock;
2191
2192 if ($conf->{template}) {
2193 # check if any base image is still used by a linked clone
2194 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2195 my ($ds, $drive) = @_;
2196 return if drive_is_cdrom($drive);
2197
2198 my $volid = $drive->{file};
2199 return if !$volid || $volid =~ m|^/|;
2200
2201 die "base volume '$volid' is still in use by linked cloned\n"
2202 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2203
2204 });
2205 }
2206
2207 my $volids = {};
2208 my $remove_owned_drive = sub {
2209 my ($ds, $drive) = @_;
2210 return if drive_is_cdrom($drive, 1);
2211
2212 my $volid = $drive->{file};
2213 return if !$volid || $volid =~ m|^/|;
2214 return if $volids->{$volid};
2215
2216 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2217 return if !$path || !$owner || ($owner != $vmid);
2218
2219 $volids->{$volid} = 1;
2220 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2221 warn "Could not remove disk '$volid', check manually: $@" if $@;
2222 };
2223
2224 # only remove disks owned by this VM (referenced in the config)
2225 my $include_opts = {
2226 include_unused => 1,
2227 extra_keys => ['vmstate'],
2228 };
2229 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2230
2231 for my $snap (values %{$conf->{snapshots}}) {
2232 next if !defined($snap->{vmstate});
2233 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2234 next if !defined($drive);
2235 $remove_owned_drive->('vmstate', $drive);
2236 }
2237
2238 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2239
2240 if ($purge_unreferenced) { # also remove unreferenced disk
2241 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2242 PVE::Storage::foreach_volid($vmdisks, sub {
2243 my ($volid, $sid, $volname, $d) = @_;
2244 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2245 warn $@ if $@;
2246 });
2247 }
2248
2249 if (defined $replacement_conf) {
2250 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2251 } else {
2252 PVE::QemuConfig->destroy_config($vmid);
2253 }
2254 }
2255
2256 sub parse_vm_config {
2257 my ($filename, $raw) = @_;
2258
2259 return if !defined($raw);
2260
2261 my $res = {
2262 digest => Digest::SHA::sha1_hex($raw),
2263 snapshots => {},
2264 pending => {},
2265 };
2266
2267 $filename =~ m|/qemu-server/(\d+)\.conf$|
2268 || die "got strange filename '$filename'";
2269
2270 my $vmid = $1;
2271
2272 my $conf = $res;
2273 my $descr;
2274 my $section = '';
2275
2276 my @lines = split(/\n/, $raw);
2277 foreach my $line (@lines) {
2278 next if $line =~ m/^\s*$/;
2279
2280 if ($line =~ m/^\[PENDING\]\s*$/i) {
2281 $section = 'pending';
2282 if (defined($descr)) {
2283 $descr =~ s/\s+$//;
2284 $conf->{description} = $descr;
2285 }
2286 $descr = undef;
2287 $conf = $res->{$section} = {};
2288 next;
2289
2290 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2291 $section = $1;
2292 if (defined($descr)) {
2293 $descr =~ s/\s+$//;
2294 $conf->{description} = $descr;
2295 }
2296 $descr = undef;
2297 $conf = $res->{snapshots}->{$section} = {};
2298 next;
2299 }
2300
2301 if ($line =~ m/^\#(.*)\s*$/) {
2302 $descr = '' if !defined($descr);
2303 $descr .= PVE::Tools::decode_text($1) . "\n";
2304 next;
2305 }
2306
2307 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2308 $descr = '' if !defined($descr);
2309 $descr .= PVE::Tools::decode_text($2);
2310 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2311 $conf->{snapstate} = $1;
2312 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2313 my $key = $1;
2314 my $value = $2;
2315 $conf->{$key} = $value;
2316 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2317 my $value = $1;
2318 if ($section eq 'pending') {
2319 $conf->{delete} = $value; # we parse this later
2320 } else {
2321 warn "vm $vmid - propertry 'delete' is only allowed in [PENDING]\n";
2322 }
2323 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2324 my $key = $1;
2325 my $value = $2;
2326 eval { $value = check_type($key, $value); };
2327 if ($@) {
2328 warn "vm $vmid - unable to parse value of '$key' - $@";
2329 } else {
2330 $key = 'ide2' if $key eq 'cdrom';
2331 my $fmt = $confdesc->{$key}->{format};
2332 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2333 my $v = parse_drive($key, $value);
2334 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2335 $v->{file} = $volid;
2336 $value = print_drive($v);
2337 } else {
2338 warn "vm $vmid - unable to parse value of '$key'\n";
2339 next;
2340 }
2341 }
2342
2343 $conf->{$key} = $value;
2344 }
2345 } else {
2346 warn "vm $vmid - unable to parse config: $line\n";
2347 }
2348 }
2349
2350 if (defined($descr)) {
2351 $descr =~ s/\s+$//;
2352 $conf->{description} = $descr;
2353 }
2354 delete $res->{snapstate}; # just to be sure
2355
2356 return $res;
2357 }
2358
2359 sub write_vm_config {
2360 my ($filename, $conf) = @_;
2361
2362 delete $conf->{snapstate}; # just to be sure
2363
2364 if ($conf->{cdrom}) {
2365 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2366 $conf->{ide2} = $conf->{cdrom};
2367 delete $conf->{cdrom};
2368 }
2369
2370 # we do not use 'smp' any longer
2371 if ($conf->{sockets}) {
2372 delete $conf->{smp};
2373 } elsif ($conf->{smp}) {
2374 $conf->{sockets} = $conf->{smp};
2375 delete $conf->{cores};
2376 delete $conf->{smp};
2377 }
2378
2379 my $used_volids = {};
2380
2381 my $cleanup_config = sub {
2382 my ($cref, $pending, $snapname) = @_;
2383
2384 foreach my $key (keys %$cref) {
2385 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2386 $key eq 'snapstate' || $key eq 'pending';
2387 my $value = $cref->{$key};
2388 if ($key eq 'delete') {
2389 die "propertry 'delete' is only allowed in [PENDING]\n"
2390 if !$pending;
2391 # fixme: check syntax?
2392 next;
2393 }
2394 eval { $value = check_type($key, $value); };
2395 die "unable to parse value of '$key' - $@" if $@;
2396
2397 $cref->{$key} = $value;
2398
2399 if (!$snapname && is_valid_drivename($key)) {
2400 my $drive = parse_drive($key, $value);
2401 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2402 }
2403 }
2404 };
2405
2406 &$cleanup_config($conf);
2407
2408 &$cleanup_config($conf->{pending}, 1);
2409
2410 foreach my $snapname (keys %{$conf->{snapshots}}) {
2411 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2412 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2413 }
2414
2415 # remove 'unusedX' settings if we re-add a volume
2416 foreach my $key (keys %$conf) {
2417 my $value = $conf->{$key};
2418 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2419 delete $conf->{$key};
2420 }
2421 }
2422
2423 my $generate_raw_config = sub {
2424 my ($conf, $pending) = @_;
2425
2426 my $raw = '';
2427
2428 # add description as comment to top of file
2429 if (defined(my $descr = $conf->{description})) {
2430 if ($descr) {
2431 foreach my $cl (split(/\n/, $descr)) {
2432 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2433 }
2434 } else {
2435 $raw .= "#\n" if $pending;
2436 }
2437 }
2438
2439 foreach my $key (sort keys %$conf) {
2440 next if $key =~ /^(digest|description|pending|snapshots)$/;
2441 $raw .= "$key: $conf->{$key}\n";
2442 }
2443 return $raw;
2444 };
2445
2446 my $raw = &$generate_raw_config($conf);
2447
2448 if (scalar(keys %{$conf->{pending}})){
2449 $raw .= "\n[PENDING]\n";
2450 $raw .= &$generate_raw_config($conf->{pending}, 1);
2451 }
2452
2453 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2454 $raw .= "\n[$snapname]\n";
2455 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2456 }
2457
2458 return $raw;
2459 }
2460
2461 sub load_defaults {
2462
2463 my $res = {};
2464
2465 # we use static defaults from our JSON schema configuration
2466 foreach my $key (keys %$confdesc) {
2467 if (defined(my $default = $confdesc->{$key}->{default})) {
2468 $res->{$key} = $default;
2469 }
2470 }
2471
2472 return $res;
2473 }
2474
2475 sub config_list {
2476 my $vmlist = PVE::Cluster::get_vmlist();
2477 my $res = {};
2478 return $res if !$vmlist || !$vmlist->{ids};
2479 my $ids = $vmlist->{ids};
2480 my $nodename = nodename();
2481
2482 foreach my $vmid (keys %$ids) {
2483 my $d = $ids->{$vmid};
2484 next if !$d->{node} || $d->{node} ne $nodename;
2485 next if !$d->{type} || $d->{type} ne 'qemu';
2486 $res->{$vmid}->{exists} = 1;
2487 }
2488 return $res;
2489 }
2490
2491 # test if VM uses local resources (to prevent migration)
2492 sub check_local_resources {
2493 my ($conf, $noerr) = @_;
2494
2495 my @loc_res = ();
2496
2497 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2498 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2499
2500 push @loc_res, "ivshmem" if $conf->{ivshmem};
2501
2502 foreach my $k (keys %$conf) {
2503 next if $k =~ m/^usb/ && ($conf->{$k} =~ m/^spice(?![^,])/);
2504 # sockets are safe: they will recreated be on the target side post-migrate
2505 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2506 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2507 }
2508
2509 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2510
2511 return \@loc_res;
2512 }
2513
2514 # check if used storages are available on all nodes (use by migrate)
2515 sub check_storage_availability {
2516 my ($storecfg, $conf, $node) = @_;
2517
2518 PVE::QemuConfig->foreach_volume($conf, sub {
2519 my ($ds, $drive) = @_;
2520
2521 my $volid = $drive->{file};
2522 return if !$volid;
2523
2524 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2525 return if !$sid;
2526
2527 # check if storage is available on both nodes
2528 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2529 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2530
2531 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2532
2533 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2534 if !$scfg->{content}->{$vtype};
2535 });
2536 }
2537
2538 # list nodes where all VM images are available (used by has_feature API)
2539 sub shared_nodes {
2540 my ($conf, $storecfg) = @_;
2541
2542 my $nodelist = PVE::Cluster::get_nodelist();
2543 my $nodehash = { map { $_ => 1 } @$nodelist };
2544 my $nodename = nodename();
2545
2546 PVE::QemuConfig->foreach_volume($conf, sub {
2547 my ($ds, $drive) = @_;
2548
2549 my $volid = $drive->{file};
2550 return if !$volid;
2551
2552 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2553 if ($storeid) {
2554 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2555 if ($scfg->{disable}) {
2556 $nodehash = {};
2557 } elsif (my $avail = $scfg->{nodes}) {
2558 foreach my $node (keys %$nodehash) {
2559 delete $nodehash->{$node} if !$avail->{$node};
2560 }
2561 } elsif (!$scfg->{shared}) {
2562 foreach my $node (keys %$nodehash) {
2563 delete $nodehash->{$node} if $node ne $nodename
2564 }
2565 }
2566 }
2567 });
2568
2569 return $nodehash
2570 }
2571
2572 sub check_local_storage_availability {
2573 my ($conf, $storecfg) = @_;
2574
2575 my $nodelist = PVE::Cluster::get_nodelist();
2576 my $nodehash = { map { $_ => {} } @$nodelist };
2577
2578 PVE::QemuConfig->foreach_volume($conf, sub {
2579 my ($ds, $drive) = @_;
2580
2581 my $volid = $drive->{file};
2582 return if !$volid;
2583
2584 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2585 if ($storeid) {
2586 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2587
2588 if ($scfg->{disable}) {
2589 foreach my $node (keys %$nodehash) {
2590 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2591 }
2592 } elsif (my $avail = $scfg->{nodes}) {
2593 foreach my $node (keys %$nodehash) {
2594 if (!$avail->{$node}) {
2595 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2596 }
2597 }
2598 }
2599 }
2600 });
2601
2602 foreach my $node (values %$nodehash) {
2603 if (my $unavail = $node->{unavailable_storages}) {
2604 $node->{unavailable_storages} = [ sort keys %$unavail ];
2605 }
2606 }
2607
2608 return $nodehash
2609 }
2610
2611 # Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2612 sub check_running {
2613 my ($vmid, $nocheck, $node) = @_;
2614
2615 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2616 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2617 }
2618
2619 sub vzlist {
2620
2621 my $vzlist = config_list();
2622
2623 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2624
2625 while (defined(my $de = $fd->read)) {
2626 next if $de !~ m/^(\d+)\.pid$/;
2627 my $vmid = $1;
2628 next if !defined($vzlist->{$vmid});
2629 if (my $pid = check_running($vmid)) {
2630 $vzlist->{$vmid}->{pid} = $pid;
2631 }
2632 }
2633
2634 return $vzlist;
2635 }
2636
2637 our $vmstatus_return_properties = {
2638 vmid => get_standard_option('pve-vmid'),
2639 status => {
2640 description => "Qemu process status.",
2641 type => 'string',
2642 enum => ['stopped', 'running'],
2643 },
2644 maxmem => {
2645 description => "Maximum memory in bytes.",
2646 type => 'integer',
2647 optional => 1,
2648 renderer => 'bytes',
2649 },
2650 maxdisk => {
2651 description => "Root disk size in bytes.",
2652 type => 'integer',
2653 optional => 1,
2654 renderer => 'bytes',
2655 },
2656 name => {
2657 description => "VM name.",
2658 type => 'string',
2659 optional => 1,
2660 },
2661 qmpstatus => {
2662 description => "Qemu QMP agent status.",
2663 type => 'string',
2664 optional => 1,
2665 },
2666 pid => {
2667 description => "PID of running qemu process.",
2668 type => 'integer',
2669 optional => 1,
2670 },
2671 uptime => {
2672 description => "Uptime.",
2673 type => 'integer',
2674 optional => 1,
2675 renderer => 'duration',
2676 },
2677 cpus => {
2678 description => "Maximum usable CPUs.",
2679 type => 'number',
2680 optional => 1,
2681 },
2682 lock => {
2683 description => "The current config lock, if any.",
2684 type => 'string',
2685 optional => 1,
2686 },
2687 tags => {
2688 description => "The current configured tags, if any",
2689 type => 'string',
2690 optional => 1,
2691 },
2692 'running-machine' => {
2693 description => "The currently running machine type (if running).",
2694 type => 'string',
2695 optional => 1,
2696 },
2697 'running-qemu' => {
2698 description => "The currently running QEMU version (if running).",
2699 type => 'string',
2700 optional => 1,
2701 },
2702 };
2703
2704 my $last_proc_pid_stat;
2705
2706 # get VM status information
2707 # This must be fast and should not block ($full == false)
2708 # We only query KVM using QMP if $full == true (this can be slow)
2709 sub vmstatus {
2710 my ($opt_vmid, $full) = @_;
2711
2712 my $res = {};
2713
2714 my $storecfg = PVE::Storage::config();
2715
2716 my $list = vzlist();
2717 my $defaults = load_defaults();
2718
2719 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2720
2721 my $cpucount = $cpuinfo->{cpus} || 1;
2722
2723 foreach my $vmid (keys %$list) {
2724 next if $opt_vmid && ($vmid ne $opt_vmid);
2725
2726 my $conf = PVE::QemuConfig->load_config($vmid);
2727
2728 my $d = { vmid => int($vmid) };
2729 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2730
2731 # fixme: better status?
2732 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2733
2734 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2735 if (defined($size)) {
2736 $d->{disk} = 0; # no info available
2737 $d->{maxdisk} = $size;
2738 } else {
2739 $d->{disk} = 0;
2740 $d->{maxdisk} = 0;
2741 }
2742
2743 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2744 * ($conf->{cores} || $defaults->{cores});
2745 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2746 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2747
2748 $d->{name} = $conf->{name} || "VM $vmid";
2749 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2750 : $defaults->{memory}*(1024*1024);
2751
2752 if ($conf->{balloon}) {
2753 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2754 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2755 : $defaults->{shares};
2756 }
2757
2758 $d->{uptime} = 0;
2759 $d->{cpu} = 0;
2760 $d->{mem} = 0;
2761
2762 $d->{netout} = 0;
2763 $d->{netin} = 0;
2764
2765 $d->{diskread} = 0;
2766 $d->{diskwrite} = 0;
2767
2768 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2769
2770 $d->{serial} = 1 if conf_has_serial($conf);
2771 $d->{lock} = $conf->{lock} if $conf->{lock};
2772 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2773
2774 $res->{$vmid} = $d;
2775 }
2776
2777 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2778 foreach my $dev (keys %$netdev) {
2779 next if $dev !~ m/^tap([1-9]\d*)i/;
2780 my $vmid = $1;
2781 my $d = $res->{$vmid};
2782 next if !$d;
2783
2784 $d->{netout} += $netdev->{$dev}->{receive};
2785 $d->{netin} += $netdev->{$dev}->{transmit};
2786
2787 if ($full) {
2788 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2789 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2790 }
2791
2792 }
2793
2794 my $ctime = gettimeofday;
2795
2796 foreach my $vmid (keys %$list) {
2797
2798 my $d = $res->{$vmid};
2799 my $pid = $d->{pid};
2800 next if !$pid;
2801
2802 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2803 next if !$pstat; # not running
2804
2805 my $used = $pstat->{utime} + $pstat->{stime};
2806
2807 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2808
2809 if ($pstat->{vsize}) {
2810 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2811 }
2812
2813 my $old = $last_proc_pid_stat->{$pid};
2814 if (!$old) {
2815 $last_proc_pid_stat->{$pid} = {
2816 time => $ctime,
2817 used => $used,
2818 cpu => 0,
2819 };
2820 next;
2821 }
2822
2823 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
2824
2825 if ($dtime > 1000) {
2826 my $dutime = $used - $old->{used};
2827
2828 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
2829 $last_proc_pid_stat->{$pid} = {
2830 time => $ctime,
2831 used => $used,
2832 cpu => $d->{cpu},
2833 };
2834 } else {
2835 $d->{cpu} = $old->{cpu};
2836 }
2837 }
2838
2839 return $res if !$full;
2840
2841 my $qmpclient = PVE::QMPClient->new();
2842
2843 my $ballooncb = sub {
2844 my ($vmid, $resp) = @_;
2845
2846 my $info = $resp->{'return'};
2847 return if !$info->{max_mem};
2848
2849 my $d = $res->{$vmid};
2850
2851 # use memory assigned to VM
2852 $d->{maxmem} = $info->{max_mem};
2853 $d->{balloon} = $info->{actual};
2854
2855 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
2856 $d->{mem} = $info->{total_mem} - $info->{free_mem};
2857 $d->{freemem} = $info->{free_mem};
2858 }
2859
2860 $d->{ballooninfo} = $info;
2861 };
2862
2863 my $blockstatscb = sub {
2864 my ($vmid, $resp) = @_;
2865 my $data = $resp->{'return'} || [];
2866 my $totalrdbytes = 0;
2867 my $totalwrbytes = 0;
2868
2869 for my $blockstat (@$data) {
2870 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
2871 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
2872
2873 $blockstat->{device} =~ s/drive-//;
2874 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
2875 }
2876 $res->{$vmid}->{diskread} = $totalrdbytes;
2877 $res->{$vmid}->{diskwrite} = $totalwrbytes;
2878 };
2879
2880 my $machinecb = sub {
2881 my ($vmid, $resp) = @_;
2882 my $data = $resp->{'return'} || [];
2883
2884 $res->{$vmid}->{'running-machine'} =
2885 PVE::QemuServer::Machine::current_from_query_machines($data);
2886 };
2887
2888 my $versioncb = sub {
2889 my ($vmid, $resp) = @_;
2890 my $data = $resp->{'return'} // {};
2891 my $version = 'unknown';
2892
2893 if (my $v = $data->{qemu}) {
2894 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
2895 }
2896
2897 $res->{$vmid}->{'running-qemu'} = $version;
2898 };
2899
2900 my $statuscb = sub {
2901 my ($vmid, $resp) = @_;
2902
2903 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
2904 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
2905 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
2906 # this fails if ballon driver is not loaded, so this must be
2907 # the last commnand (following command are aborted if this fails).
2908 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
2909
2910 my $status = 'unknown';
2911 if (!defined($status = $resp->{'return'}->{status})) {
2912 warn "unable to get VM status\n";
2913 return;
2914 }
2915
2916 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
2917 };
2918
2919 foreach my $vmid (keys %$list) {
2920 next if $opt_vmid && ($vmid ne $opt_vmid);
2921 next if !$res->{$vmid}->{pid}; # not running
2922 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
2923 }
2924
2925 $qmpclient->queue_execute(undef, 2);
2926
2927 foreach my $vmid (keys %$list) {
2928 next if $opt_vmid && ($vmid ne $opt_vmid);
2929 next if !$res->{$vmid}->{pid}; #not running
2930
2931 # we can't use the $qmpclient since it might have already aborted on
2932 # 'query-balloon', but this might also fail for older versions...
2933 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
2934 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
2935 }
2936
2937 foreach my $vmid (keys %$list) {
2938 next if $opt_vmid && ($vmid ne $opt_vmid);
2939 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
2940 }
2941
2942 return $res;
2943 }
2944
2945 sub conf_has_serial {
2946 my ($conf) = @_;
2947
2948 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
2949 if ($conf->{"serial$i"}) {
2950 return 1;
2951 }
2952 }
2953
2954 return 0;
2955 }
2956
2957 sub conf_has_audio {
2958 my ($conf, $id) = @_;
2959
2960 $id //= 0;
2961 my $audio = $conf->{"audio$id"};
2962 return if !defined($audio);
2963
2964 my $audioproperties = parse_property_string($audio_fmt, $audio);
2965 my $audiodriver = $audioproperties->{driver} // 'spice';
2966
2967 return {
2968 dev => $audioproperties->{device},
2969 dev_id => "audiodev$id",
2970 backend => $audiodriver,
2971 backend_id => "$audiodriver-backend${id}",
2972 };
2973 }
2974
2975 sub audio_devs {
2976 my ($audio, $audiopciaddr, $machine_version) = @_;
2977
2978 my $devs = [];
2979
2980 my $id = $audio->{dev_id};
2981 my $audiodev = "";
2982 if (min_version($machine_version, 4, 2)) {
2983 $audiodev = ",audiodev=$audio->{backend_id}";
2984 }
2985
2986 if ($audio->{dev} eq 'AC97') {
2987 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
2988 } elsif ($audio->{dev} =~ /intel\-hda$/) {
2989 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
2990 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
2991 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
2992 } else {
2993 die "unkown audio device '$audio->{dev}', implement me!";
2994 }
2995
2996 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
2997
2998 return $devs;
2999 }
3000
3001 sub get_tpm_paths {
3002 my ($vmid) = @_;
3003 return {
3004 socket => "/var/run/qemu-server/$vmid.swtpm",
3005 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3006 };
3007 }
3008
3009 sub add_tpm_device {
3010 my ($vmid, $devices, $conf) = @_;
3011
3012 return if !$conf->{tpmstate0};
3013
3014 my $paths = get_tpm_paths($vmid);
3015
3016 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3017 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3018 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3019 }
3020
3021 sub start_swtpm {
3022 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3023
3024 return if !$tpmdrive;
3025
3026 my $state;
3027 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3028 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3029 if ($storeid) {
3030 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3031 } else {
3032 $state = $tpm->{file};
3033 }
3034
3035 my $paths = get_tpm_paths($vmid);
3036
3037 # during migration, we will get state from remote
3038 #
3039 if (!$migration) {
3040 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3041 my $setup_cmd = [
3042 "swtpm_setup",
3043 "--tpmstate",
3044 "file://$state",
3045 "--createek",
3046 "--create-ek-cert",
3047 "--create-platform-cert",
3048 "--lock-nvram",
3049 "--config",
3050 "/etc/swtpm_setup.conf", # do not use XDG configs
3051 "--runas",
3052 "0", # force creation as root, error if not possible
3053 "--not-overwrite", # ignore existing state, do not modify
3054 ];
3055
3056 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3057 # TPM 2.0 supports ECC crypto, use if possible
3058 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3059
3060 run_command($setup_cmd, outfunc => sub {
3061 print "swtpm_setup: $1\n";
3062 });
3063 }
3064
3065 my $emulator_cmd = [
3066 "swtpm",
3067 "socket",
3068 "--tpmstate",
3069 "backend-uri=file://$state,mode=0600",
3070 "--ctrl",
3071 "type=unixio,path=$paths->{socket},mode=0600",
3072 "--pid",
3073 "file=$paths->{pid}",
3074 "--terminate", # terminate on QEMU disconnect
3075 "--daemon",
3076 ];
3077 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3078 run_command($emulator_cmd, outfunc => sub { print $1; });
3079
3080 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3081 while (! -e $paths->{pid}) {
3082 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3083 usleep(50_000);
3084 }
3085
3086 # return untainted PID of swtpm daemon so it can be killed on error
3087 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3088 return $1;
3089 }
3090
3091 sub vga_conf_has_spice {
3092 my ($vga) = @_;
3093
3094 my $vgaconf = parse_vga($vga);
3095 my $vgatype = $vgaconf->{type};
3096 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3097
3098 return $1 || 1;
3099 }
3100
3101 sub is_native($) {
3102 my ($arch) = @_;
3103 return get_host_arch() eq $arch;
3104 }
3105
3106 sub get_vm_arch {
3107 my ($conf) = @_;
3108 return $conf->{arch} // get_host_arch();
3109 }
3110
3111 my $default_machines = {
3112 x86_64 => 'pc',
3113 aarch64 => 'virt',
3114 };
3115
3116 sub get_installed_machine_version {
3117 my ($kvmversion) = @_;
3118 $kvmversion = kvm_user_version() if !defined($kvmversion);
3119 $kvmversion =~ m/^(\d+\.\d+)/;
3120 return $1;
3121 }
3122
3123 sub windows_get_pinned_machine_version {
3124 my ($machine, $base_version, $kvmversion) = @_;
3125
3126 my $pin_version = $base_version;
3127 if (!defined($base_version) ||
3128 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3129 ) {
3130 $pin_version = get_installed_machine_version($kvmversion);
3131 }
3132 if (!$machine || $machine eq 'pc') {
3133 $machine = "pc-i440fx-$pin_version";
3134 } elsif ($machine eq 'q35') {
3135 $machine = "pc-q35-$pin_version";
3136 } elsif ($machine eq 'virt') {
3137 $machine = "virt-$pin_version";
3138 } else {
3139 warn "unknown machine type '$machine', not touching that!\n";
3140 }
3141
3142 return $machine;
3143 }
3144
3145 sub get_vm_machine {
3146 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3147
3148 my $machine = $forcemachine || $conf->{machine};
3149
3150 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3151 $kvmversion //= kvm_user_version();
3152 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3153 # layout which confuses windows quite a bit and may result in various regressions..
3154 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3155 if (windows_version($conf->{ostype})) {
3156 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3157 }
3158 $arch //= 'x86_64';
3159 $machine ||= $default_machines->{$arch};
3160 if ($add_pve_version) {
3161 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3162 $machine .= "+pve$pvever";
3163 }
3164 }
3165
3166 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3167 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3168 $machine = $1 if $is_pxe;
3169
3170 # for version-pinned machines that do not include a pve-version (e.g.
3171 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3172 $machine .= '+pve0';
3173
3174 $machine .= '.pxe' if $is_pxe;
3175 }
3176
3177 return $machine;
3178 }
3179
3180 sub get_ovmf_files($$$) {
3181 my ($arch, $efidisk, $smm) = @_;
3182
3183 my $types = $OVMF->{$arch}
3184 or die "no OVMF images known for architecture '$arch'\n";
3185
3186 my $type = 'default';
3187 if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3188 $type = $smm ? "4m" : "4m-no-smm";
3189 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3190 }
3191
3192 return $types->{$type}->@*;
3193 }
3194
3195 my $Arch2Qemu = {
3196 aarch64 => '/usr/bin/qemu-system-aarch64',
3197 x86_64 => '/usr/bin/qemu-system-x86_64',
3198 };
3199 sub get_command_for_arch($) {
3200 my ($arch) = @_;
3201 return '/usr/bin/kvm' if is_native($arch);
3202
3203 my $cmd = $Arch2Qemu->{$arch}
3204 or die "don't know how to emulate architecture '$arch'\n";
3205 return $cmd;
3206 }
3207
3208 # To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3209 # to use in a QEMU command line (-cpu element), first array_intersect the result
3210 # of query_supported_ with query_understood_. This is necessary because:
3211 #
3212 # a) query_understood_ returns flags the host cannot use and
3213 # b) query_supported_ (rather the QMP call) doesn't actually return CPU
3214 # flags, but CPU settings - with most of them being flags. Those settings
3215 # (and some flags, curiously) cannot be specified as a "-cpu" argument.
3216 #
3217 # query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3218 # expensive. If you need the value returned from this, you can get it much
3219 # cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3220 # $accel being 'kvm' or 'tcg'.
3221 #
3222 # pvestatd calls this function on startup and whenever the QEMU/KVM version
3223 # changes, automatically populating pmxcfs.
3224 #
3225 # Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3226 # since kvm and tcg machines support different flags
3227 #
3228 sub query_supported_cpu_flags {
3229 my ($arch) = @_;
3230
3231 $arch //= get_host_arch();
3232 my $default_machine = $default_machines->{$arch};
3233
3234 my $flags = {};
3235
3236 # FIXME: Once this is merged, the code below should work for ARM as well:
3237 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3238 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3239 $arch eq "aarch64";
3240
3241 my $kvm_supported = defined(kvm_version());
3242 my $qemu_cmd = get_command_for_arch($arch);
3243 my $fakevmid = -1;
3244 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3245
3246 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3247 my $query_supported_run_qemu = sub {
3248 my ($kvm) = @_;
3249
3250 my $flags = {};
3251 my $cmd = [
3252 $qemu_cmd,
3253 '-machine', $default_machine,
3254 '-display', 'none',
3255 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3256 '-mon', 'chardev=qmp,mode=control',
3257 '-pidfile', $pidfile,
3258 '-S', '-daemonize'
3259 ];
3260
3261 if (!$kvm) {
3262 push @$cmd, '-accel', 'tcg';
3263 }
3264
3265 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3266 die "QEMU flag querying VM exited with code " . $rc if $rc;
3267
3268 eval {
3269 my $cmd_result = mon_cmd(
3270 $fakevmid,
3271 'query-cpu-model-expansion',
3272 type => 'full',
3273 model => { name => 'host' }
3274 );
3275
3276 my $props = $cmd_result->{model}->{props};
3277 foreach my $prop (keys %$props) {
3278 next if $props->{$prop} ne '1';
3279 # QEMU returns some flags multiple times, with '_', '.' or '-'
3280 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3281 # We only keep those with underscores, to match /proc/cpuinfo
3282 $prop =~ s/\.|-/_/g;
3283 $flags->{$prop} = 1;
3284 }
3285 };
3286 my $err = $@;
3287
3288 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3289 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3290
3291 die $err if $err;
3292
3293 return [ sort keys %$flags ];
3294 };
3295
3296 # We need to query QEMU twice, since KVM and TCG have different supported flags
3297 PVE::QemuConfig->lock_config($fakevmid, sub {
3298 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3299 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3300
3301 if ($kvm_supported) {
3302 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3303 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3304 }
3305 });
3306
3307 return $flags;
3308 }
3309
3310 # Understood CPU flags are written to a file at 'pve-qemu' compile time
3311 my $understood_cpu_flag_dir = "/usr/share/kvm";
3312 sub query_understood_cpu_flags {
3313 my $arch = get_host_arch();
3314 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3315
3316 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3317 if ! -e $filepath;
3318
3319 my $raw = file_get_contents($filepath);
3320 $raw =~ s/^\s+|\s+$//g;
3321 my @flags = split(/\s+/, $raw);
3322
3323 return \@flags;
3324 }
3325
3326 my sub get_cpuunits {
3327 my ($conf) = @_;
3328 return $conf->{cpuunits} // (PVE::CGroup::cgroup_mode() == 2 ? 100 : 1024);
3329 }
3330 sub config_to_command {
3331 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3332 $pbs_backing) = @_;
3333
3334 my $cmd = [];
3335 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3336 my $devices = [];
3337 my $bridges = {};
3338 my $ostype = $conf->{ostype};
3339 my $winversion = windows_version($ostype);
3340 my $kvm = $conf->{kvm};
3341 my $nodename = nodename();
3342
3343 my $arch = get_vm_arch($conf);
3344 my $kvm_binary = get_command_for_arch($arch);
3345 my $kvmver = kvm_user_version($kvm_binary);
3346
3347 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3348 $kvmver //= "undefined";
3349 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3350 }
3351
3352 my $add_pve_version = min_version($kvmver, 4, 1);
3353
3354 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3355 my $machine_version = extract_version($machine_type, $kvmver);
3356 $kvm //= 1 if is_native($arch);
3357
3358 $machine_version =~ m/(\d+)\.(\d+)/;
3359 my ($machine_major, $machine_minor) = ($1, $2);
3360
3361 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3362 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3363 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3364 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3365 ." please upgrade node '$nodename'\n"
3366 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3367 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3368 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3369 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3370 ." node '$nodename'\n";
3371 }
3372
3373 # if a specific +pve version is required for a feature, use $version_guard
3374 # instead of min_version to allow machines to be run with the minimum
3375 # required version
3376 my $required_pve_version = 0;
3377 my $version_guard = sub {
3378 my ($major, $minor, $pve) = @_;
3379 return 0 if !min_version($machine_version, $major, $minor, $pve);
3380 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3381 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3382 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3383 return 1;
3384 };
3385
3386 if ($kvm && !defined kvm_version()) {
3387 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3388 ." or enable in BIOS.\n";
3389 }
3390
3391 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3392 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3393 my $use_old_bios_files = undef;
3394 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3395
3396 my $cpuunits = get_cpuunits($conf);
3397
3398 push @$cmd, $kvm_binary;
3399
3400 push @$cmd, '-id', $vmid;
3401
3402 my $vmname = $conf->{name} || "vm$vmid";
3403
3404 push @$cmd, '-name', $vmname;
3405
3406 push @$cmd, '-no-shutdown';
3407
3408 my $use_virtio = 0;
3409
3410 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3411 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3412 push @$cmd, '-mon', "chardev=qmp,mode=control";
3413
3414 if (min_version($machine_version, 2, 12)) {
3415 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3416 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3417 }
3418
3419 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3420
3421 push @$cmd, '-daemonize';
3422
3423 if ($conf->{smbios1}) {
3424 my $smbios_conf = parse_smbios1($conf->{smbios1});
3425 if ($smbios_conf->{base64}) {
3426 # Do not pass base64 flag to qemu
3427 delete $smbios_conf->{base64};
3428 my $smbios_string = "";
3429 foreach my $key (keys %$smbios_conf) {
3430 my $value;
3431 if ($key eq "uuid") {
3432 $value = $smbios_conf->{uuid}
3433 } else {
3434 $value = decode_base64($smbios_conf->{$key});
3435 }
3436 # qemu accepts any binary data, only commas need escaping by double comma
3437 $value =~ s/,/,,/g;
3438 $smbios_string .= "," . $key . "=" . $value if $value;
3439 }
3440 push @$cmd, '-smbios', "type=1" . $smbios_string;
3441 } else {
3442 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3443 }
3444 }
3445
3446 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3447 my $d;
3448 if (my $efidisk = $conf->{efidisk0}) {
3449 $d = parse_drive('efidisk0', $efidisk);
3450 }
3451
3452 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3453 die "uefi base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3454
3455 my ($path, $format);
3456 my $read_only_str = '';
3457 if ($d) {
3458 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3459 $format = $d->{format};
3460 if ($storeid) {
3461 $path = PVE::Storage::path($storecfg, $d->{file});
3462 if (!defined($format)) {
3463 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3464 $format = qemu_img_format($scfg, $volname);
3465 }
3466 } else {
3467 $path = $d->{file};
3468 die "efidisk format must be specified\n"
3469 if !defined($format);
3470 }
3471
3472 $read_only_str = ',readonly=on' if drive_is_read_only($conf, $d);
3473 } else {
3474 warn "no efidisk configured! Using temporary efivars disk.\n";
3475 $path = "/tmp/$vmid-ovmf.fd";
3476 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3477 $format = 'raw';
3478 }
3479
3480 my $size_str = "";
3481
3482 if ($format eq 'raw' && $version_guard->(4, 1, 2)) {
3483 $size_str = ",size=" . (-s $ovmf_vars);
3484 }
3485
3486 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3487 my $cache = "";
3488 if ($path =~ m/^rbd:/) {
3489 $cache = ',cache=writeback';
3490 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3491 }
3492
3493 push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code";
3494 push @$cmd, '-drive', "if=pflash,unit=1$cache,format=$format,id=drive-efidisk0$size_str,file=${path}${read_only_str}";
3495 }
3496
3497 if ($q35) { # tell QEMU to load q35 config early
3498 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3499 if (min_version($machine_version, 4, 0)) {
3500 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3501 } else {
3502 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3503 }
3504 }
3505
3506 if ($conf->{vmgenid}) {
3507 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3508 }
3509
3510 # add usb controllers
3511 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3512 $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES);
3513 push @$devices, @usbcontrollers if @usbcontrollers;
3514 my $vga = parse_vga($conf->{vga});
3515
3516 my $qxlnum = vga_conf_has_spice($conf->{vga});
3517 $vga->{type} = 'qxl' if $qxlnum;
3518
3519 if (!$vga->{type}) {
3520 if ($arch eq 'aarch64') {
3521 $vga->{type} = 'virtio';
3522 } elsif (min_version($machine_version, 2, 9)) {
3523 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3524 } else {
3525 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3526 }
3527 }
3528
3529 # enable absolute mouse coordinates (needed by vnc)
3530 my $tablet = $conf->{tablet};
3531 if (!defined($tablet)) {
3532 $tablet = $defaults->{tablet};
3533 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3534 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3535 }
3536
3537 if ($tablet) {
3538 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3539 my $kbd = print_keyboarddevice_full($conf, $arch);
3540 push @$devices, '-device', $kbd if defined($kbd);
3541 }
3542
3543 my $bootorder = device_bootorder($conf);
3544
3545 # host pci device passthrough
3546 my ($kvm_off, $gpu_passthrough, $legacy_igd) = PVE::QemuServer::PCI::print_hostpci_devices(
3547 $vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder);
3548
3549 # usb devices
3550 my $usb_dev_features = {};
3551 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3552
3553 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3554 $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder);
3555 push @$devices, @usbdevices if @usbdevices;
3556
3557 # serial devices
3558 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3559 my $path = $conf->{"serial$i"} or next;
3560 if ($path eq 'socket') {
3561 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3562 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3563 # On aarch64, serial0 is the UART device. Qemu only allows
3564 # connecting UART devices via the '-serial' command line, as
3565 # the device has a fixed slot on the hardware...
3566 if ($arch eq 'aarch64' && $i == 0) {
3567 push @$devices, '-serial', "chardev:serial$i";
3568 } else {
3569 push @$devices, '-device', "isa-serial,chardev=serial$i";
3570 }
3571 } else {
3572 die "no such serial device\n" if ! -c $path;
3573 push @$devices, '-chardev', "tty,id=serial$i,path=$path";
3574 push @$devices, '-device', "isa-serial,chardev=serial$i";
3575 }
3576 }
3577
3578 # parallel devices
3579 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3580 if (my $path = $conf->{"parallel$i"}) {
3581 die "no such parallel device\n" if ! -c $path;
3582 my $devtype = $path =~ m!^/dev/usb/lp! ? 'tty' : 'parport';
3583 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3584 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3585 }
3586 }
3587
3588 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3589 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3590 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3591 push @$devices, @$audio_devs;
3592 }
3593
3594 add_tpm_device($vmid, $devices, $conf);
3595
3596 my $sockets = 1;
3597 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3598 $sockets = $conf->{sockets} if $conf->{sockets};
3599
3600 my $cores = $conf->{cores} || 1;
3601
3602 my $maxcpus = $sockets * $cores;
3603
3604 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3605
3606 my $allowed_vcpus = $cpuinfo->{cpus};
3607
3608 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3609
3610 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3611 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3612 for (my $i = 2; $i <= $vcpus; $i++) {
3613 my $cpustr = print_cpu_device($conf,$i);
3614 push @$cmd, '-device', $cpustr;
3615 }
3616
3617 } else {
3618
3619 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3620 }
3621 push @$cmd, '-nodefaults';
3622
3623 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3624
3625 push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3626
3627 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3628
3629 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3630 push @$devices, '-device', print_vga_device(
3631 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3632 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3633 push @$cmd, '-vnc', "unix:$socket,password=on";
3634 } else {
3635 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3636 push @$cmd, '-nographic';
3637 }
3638
3639 # time drift fix
3640 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3641 my $useLocaltime = $conf->{localtime};
3642
3643 if ($winversion >= 5) { # windows
3644 $useLocaltime = 1 if !defined($conf->{localtime});
3645
3646 # use time drift fix when acpi is enabled
3647 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3648 $tdf = 1 if !defined($conf->{tdf});
3649 }
3650 }
3651
3652 if ($winversion >= 6) {
3653 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3654 push @$cmd, '-no-hpet';
3655 }
3656
3657 push @$rtcFlags, 'driftfix=slew' if $tdf;
3658
3659 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3660 push @$rtcFlags, "base=$conf->{startdate}";
3661 } elsif ($useLocaltime) {
3662 push @$rtcFlags, 'base=localtime';
3663 }
3664
3665 if ($forcecpu) {
3666 push @$cmd, '-cpu', $forcecpu;
3667 } else {
3668 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3669 }
3670
3671 PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
3672
3673 push @$cmd, '-S' if $conf->{freeze};
3674
3675 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3676
3677 my $guest_agent = parse_guest_agent($conf);
3678
3679 if ($guest_agent->{enabled}) {
3680 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3681 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3682
3683 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3684 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3685 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3686 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3687 } elsif ($guest_agent->{type} eq 'isa') {
3688 push @$devices, '-device', "isa-serial,chardev=qga0";
3689 }
3690 }
3691
3692 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3693 if ($rng && $version_guard->(4, 1, 2)) {
3694 check_rng_source($rng->{source});
3695
3696 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3697 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3698 my $limiter_str = "";
3699 if ($max_bytes) {
3700 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3701 }
3702
3703 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3704 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3705 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3706 }
3707
3708 my $spice_port;
3709
3710 if ($qxlnum) {
3711 if ($qxlnum > 1) {
3712 if ($winversion){
3713 for (my $i = 1; $i < $qxlnum; $i++){
3714 push @$devices, '-device', print_vga_device(
3715 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3716 }
3717 } else {
3718 # assume other OS works like Linux
3719 my ($ram, $vram) = ("134217728", "67108864");
3720 if ($vga->{memory}) {
3721 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3722 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3723 }
3724 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3725 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3726 }
3727 }
3728
3729 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3730
3731 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3732 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3733 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3734
3735 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3736 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3737 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3738
3739 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3740 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3741
3742 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3743 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3744 if ($spice_enhancement->{foldersharing}) {
3745 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3746 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3747 }
3748
3749 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3750 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3751 if $spice_enhancement->{videostreaming};
3752
3753 push @$devices, '-spice', "$spice_opts";
3754 }
3755
3756 # enable balloon by default, unless explicitly disabled
3757 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3758 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3759 push @$devices, '-device', "virtio-balloon-pci,id=balloon0$pciaddr";
3760 }
3761
3762 if ($conf->{watchdog}) {
3763 my $wdopts = parse_watchdog($conf->{watchdog});
3764 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3765 my $watchdog = $wdopts->{model} || 'i6300esb';
3766 push @$devices, '-device', "$watchdog$pciaddr";
3767 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3768 }
3769
3770 my $vollist = [];
3771 my $scsicontroller = {};
3772 my $ahcicontroller = {};
3773 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3774
3775 # Add iscsi initiator name if available
3776 if (my $initiator = get_initiator_name()) {
3777 push @$devices, '-iscsi', "initiator-name=$initiator";
3778 }
3779
3780 PVE::QemuConfig->foreach_volume($conf, sub {
3781 my ($ds, $drive) = @_;
3782
3783 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3784 check_volume_storage_type($storecfg, $drive->{file});
3785 push @$vollist, $drive->{file};
3786 }
3787
3788 # ignore efidisk here, already added in bios/fw handling code above
3789 return if $drive->{interface} eq 'efidisk';
3790 # similar for TPM
3791 return if $drive->{interface} eq 'tpmstate';
3792
3793 $use_virtio = 1 if $ds =~ m/^virtio/;
3794
3795 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3796
3797 if ($drive->{interface} eq 'virtio'){
3798 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
3799 }
3800
3801 if ($drive->{interface} eq 'scsi') {
3802
3803 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
3804
3805 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
3806 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
3807
3808 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
3809 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
3810
3811 my $iothread = '';
3812 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
3813 $iothread .= ",iothread=iothread-$controller_prefix$controller";
3814 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
3815 } elsif ($drive->{iothread}) {
3816 warn "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n";
3817 }
3818
3819 my $queues = '';
3820 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
3821 $queues = ",num_queues=$drive->{queues}";
3822 }
3823
3824 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
3825 if !$scsicontroller->{$controller};
3826 $scsicontroller->{$controller}=1;
3827 }
3828
3829 if ($drive->{interface} eq 'sata') {
3830 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
3831 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
3832 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
3833 if !$ahcicontroller->{$controller};
3834 $ahcicontroller->{$controller}=1;
3835 }
3836
3837 my $pbs_conf = $pbs_backing->{$ds};
3838 my $pbs_name = undef;
3839 if ($pbs_conf) {
3840 $pbs_name = "drive-$ds-pbs";
3841 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
3842 }
3843
3844 my $drive_cmd = print_drive_commandline_full(
3845 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
3846
3847 # extra protection for templates, but SATA and IDE don't support it..
3848 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
3849
3850 push @$devices, '-drive',$drive_cmd;
3851 push @$devices, '-device', print_drivedevice_full(
3852 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
3853 });
3854
3855 for (my $i = 0; $i < $MAX_NETS; $i++) {
3856 my $netname = "net$i";
3857
3858 next if !$conf->{$netname};
3859 my $d = parse_net($conf->{$netname});
3860 next if !$d;
3861
3862 $use_virtio = 1 if $d->{model} eq 'virtio';
3863
3864 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
3865
3866 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
3867 push @$devices, '-netdev', $netdevfull;
3868
3869 my $netdevicefull = print_netdevice_full(
3870 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type);
3871
3872 push @$devices, '-device', $netdevicefull;
3873 }
3874
3875 if ($conf->{ivshmem}) {
3876 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
3877
3878 my $bus;
3879 if ($q35) {
3880 $bus = print_pcie_addr("ivshmem");
3881 } else {
3882 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
3883 }
3884
3885 my $ivshmem_name = $ivshmem->{name} // $vmid;
3886 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
3887
3888 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
3889 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
3890 .",size=$ivshmem->{size}M";
3891 }
3892
3893 # pci.4 is nested in pci.1
3894 $bridges->{1} = 1 if $bridges->{4};
3895
3896 if (!$q35) { # add pci bridges
3897 if (min_version($machine_version, 2, 3)) {
3898 $bridges->{1} = 1;
3899 $bridges->{2} = 1;
3900 }
3901 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
3902 }
3903
3904 for my $k (sort {$b cmp $a} keys %$bridges) {
3905 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
3906
3907 my $k_name = $k;
3908 if ($k == 2 && $legacy_igd) {
3909 $k_name = "$k-igd";
3910 }
3911 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
3912 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
3913
3914 if ($q35) { # add after -readconfig pve-q35.cfg
3915 splice @$devices, 2, 0, '-device', $devstr;
3916 } else {
3917 unshift @$devices, '-device', $devstr if $k > 0;
3918 }
3919 }
3920
3921 if (!$kvm) {
3922 push @$machineFlags, 'accel=tcg';
3923 }
3924
3925 my $machine_type_min = $machine_type;
3926 if ($add_pve_version) {
3927 $machine_type_min =~ s/\+pve\d+$//;
3928 $machine_type_min .= "+pve$required_pve_version";
3929 }
3930 push @$machineFlags, "type=${machine_type_min}";
3931
3932 push @$cmd, @$devices;
3933 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
3934 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
3935 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
3936
3937 if (my $vmstate = $conf->{vmstate}) {
3938 my $statepath = PVE::Storage::path($storecfg, $vmstate);
3939 push @$vollist, $vmstate;
3940 push @$cmd, '-loadstate', $statepath;
3941 print "activating and using '$vmstate' as vmstate\n";
3942 }
3943
3944 if (PVE::QemuConfig->is_template($conf)) {
3945 # needed to workaround base volumes being read-only
3946 push @$cmd, '-snapshot';
3947 }
3948
3949 # add custom args
3950 if ($conf->{args}) {
3951 my $aa = PVE::Tools::split_args($conf->{args});
3952 push @$cmd, @$aa;
3953 }
3954
3955 return wantarray ? ($cmd, $vollist, $spice_port) : $cmd;
3956 }
3957
3958 sub check_rng_source {
3959 my ($source) = @_;
3960
3961 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
3962 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
3963 if ! -e $source;
3964
3965 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
3966 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
3967 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
3968 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
3969 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
3970 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
3971 ." to the host.\n";
3972 }
3973 }
3974
3975 sub spice_port {
3976 my ($vmid) = @_;
3977
3978 my $res = mon_cmd($vmid, 'query-spice');
3979
3980 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
3981 }
3982
3983 sub vm_devices_list {
3984 my ($vmid) = @_;
3985
3986 my $res = mon_cmd($vmid, 'query-pci');
3987 my $devices_to_check = [];
3988 my $devices = {};
3989 foreach my $pcibus (@$res) {
3990 push @$devices_to_check, @{$pcibus->{devices}},
3991 }
3992
3993 while (@$devices_to_check) {
3994 my $to_check = [];
3995 for my $d (@$devices_to_check) {
3996 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
3997 next if !$d->{'pci_bridge'};
3998
3999 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4000 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
4001 }
4002 $devices_to_check = $to_check;
4003 }
4004
4005 my $resblock = mon_cmd($vmid, 'query-block');
4006 foreach my $block (@$resblock) {
4007 if($block->{device} =~ m/^drive-(\S+)/){
4008 $devices->{$1} = 1;
4009 }
4010 }
4011
4012 my $resmice = mon_cmd($vmid, 'query-mice');
4013 foreach my $mice (@$resmice) {
4014 if ($mice->{name} eq 'QEMU HID Tablet') {
4015 $devices->{tablet} = 1;
4016 last;
4017 }
4018 }
4019
4020 # for usb devices there is no query-usb
4021 # but we can iterate over the entries in
4022 # qom-list path=/machine/peripheral
4023 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4024 foreach my $per (@$resperipheral) {
4025 if ($per->{name} =~ m/^usb\d+$/) {
4026 $devices->{$per->{name}} = 1;
4027 }
4028 }
4029
4030 return $devices;
4031 }
4032
4033 sub vm_deviceplug {
4034 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4035
4036 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4037
4038 my $devices_list = vm_devices_list($vmid);
4039 return 1 if defined($devices_list->{$deviceid});
4040
4041 # add PCI bridge if we need it for the device
4042 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4043
4044 if ($deviceid eq 'tablet') {
4045 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4046 } elsif ($deviceid eq 'keyboard') {
4047 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4048 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4049 die "usb hotplug currently not reliable\n";
4050 # since we can't reliably hot unplug all added usb devices and usb
4051 # passthrough breaks live migration we disable usb hotplugging for now
4052 #qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device));
4053 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4054 qemu_iothread_add($vmid, $deviceid, $device);
4055
4056 qemu_driveadd($storecfg, $vmid, $device);
4057 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4058
4059 qemu_deviceadd($vmid, $devicefull);
4060 eval { qemu_deviceaddverify($vmid, $deviceid); };
4061 if (my $err = $@) {
4062 eval { qemu_drivedel($vmid, $deviceid); };
4063 warn $@ if $@;
4064 die $err;
4065 }
4066 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4067 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4068 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4069 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4070
4071 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4072
4073 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4074 qemu_iothread_add($vmid, $deviceid, $device);
4075 $devicefull .= ",iothread=iothread-$deviceid";
4076 }
4077
4078 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4079 $devicefull .= ",num_queues=$device->{queues}";
4080 }
4081
4082 qemu_deviceadd($vmid, $devicefull);
4083 qemu_deviceaddverify($vmid, $deviceid);
4084 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4085 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4086 qemu_driveadd($storecfg, $vmid, $device);
4087
4088 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4089 eval { qemu_deviceadd($vmid, $devicefull); };
4090 if (my $err = $@) {
4091 eval { qemu_drivedel($vmid, $deviceid); };
4092 warn $@ if $@;
4093 die $err;
4094 }
4095 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4096 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4097
4098 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4099 my $use_old_bios_files = undef;
4100 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4101
4102 my $netdevicefull = print_netdevice_full(
4103 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type);
4104 qemu_deviceadd($vmid, $netdevicefull);
4105 eval {
4106 qemu_deviceaddverify($vmid, $deviceid);
4107 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4108 };
4109 if (my $err = $@) {
4110 eval { qemu_netdevdel($vmid, $deviceid); };
4111 warn $@ if $@;
4112 die $err;
4113 }
4114 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4115 my $bridgeid = $2;
4116 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4117 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4118
4119 qemu_deviceadd($vmid, $devicefull);
4120 qemu_deviceaddverify($vmid, $deviceid);
4121 } else {
4122 die "can't hotplug device '$deviceid'\n";
4123 }
4124
4125 return 1;
4126 }
4127
4128 # fixme: this should raise exceptions on error!
4129 sub vm_deviceunplug {
4130 my ($vmid, $conf, $deviceid) = @_;
4131
4132 my $devices_list = vm_devices_list($vmid);
4133 return 1 if !defined($devices_list->{$deviceid});
4134
4135 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4136 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4137
4138 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard') {
4139 qemu_devicedel($vmid, $deviceid);
4140 } elsif ($deviceid =~ m/^usb\d+$/) {
4141 die "usb hotplug currently not reliable\n";
4142 # when unplugging usb devices this way, there may be remaining usb
4143 # controllers/hubs so we disable it for now
4144 #qemu_devicedel($vmid, $deviceid);
4145 #qemu_devicedelverify($vmid, $deviceid);
4146 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4147 my $device = parse_drive($deviceid, $conf->{$deviceid});
4148
4149 qemu_devicedel($vmid, $deviceid);
4150 qemu_devicedelverify($vmid, $deviceid);
4151 qemu_drivedel($vmid, $deviceid);
4152 qemu_iothread_del($vmid, $deviceid, $device);
4153 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4154 qemu_devicedel($vmid, $deviceid);
4155 qemu_devicedelverify($vmid, $deviceid);
4156 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4157 my $device = parse_drive($deviceid, $conf->{$deviceid});
4158
4159 qemu_devicedel($vmid, $deviceid);
4160 qemu_drivedel($vmid, $deviceid);
4161 qemu_deletescsihw($conf, $vmid, $deviceid);
4162
4163 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4164 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4165 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4166 qemu_devicedel($vmid, $deviceid);
4167 qemu_devicedelverify($vmid, $deviceid);
4168 qemu_netdevdel($vmid, $deviceid);
4169 } else {
4170 die "can't unplug device '$deviceid'\n";
4171 }
4172
4173 return 1;
4174 }
4175
4176 sub qemu_deviceadd {
4177 my ($vmid, $devicefull) = @_;
4178
4179 $devicefull = "driver=".$devicefull;
4180 my %options = split(/[=,]/, $devicefull);
4181
4182 mon_cmd($vmid, "device_add" , %options);
4183 }
4184
4185 sub qemu_devicedel {
4186 my ($vmid, $deviceid) = @_;
4187
4188 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
4189 }
4190
4191 sub qemu_iothread_add {
4192 my ($vmid, $deviceid, $device) = @_;
4193
4194 if ($device->{iothread}) {
4195 my $iothreads = vm_iothreads_list($vmid);
4196 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4197 }
4198 }
4199
4200 sub qemu_iothread_del {
4201 my ($vmid, $deviceid, $device) = @_;
4202
4203 if ($device->{iothread}) {
4204 my $iothreads = vm_iothreads_list($vmid);
4205 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4206 }
4207 }
4208
4209 sub qemu_objectadd {
4210 my ($vmid, $objectid, $qomtype) = @_;
4211
4212 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4213
4214 return 1;
4215 }
4216
4217 sub qemu_objectdel {
4218 my ($vmid, $objectid) = @_;
4219
4220 mon_cmd($vmid, "object-del", id => $objectid);
4221
4222 return 1;
4223 }
4224
4225 sub qemu_driveadd {
4226 my ($storecfg, $vmid, $device) = @_;
4227
4228 my $kvmver = get_running_qemu_version($vmid);
4229 my $io_uring = min_version($kvmver, 6, 0);
4230 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4231 $drive =~ s/\\/\\\\/g;
4232 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4233
4234 # If the command succeeds qemu prints: "OK"
4235 return 1 if $ret =~ m/OK/s;
4236
4237 die "adding drive failed: $ret\n";
4238 }
4239
4240 sub qemu_drivedel {
4241 my ($vmid, $deviceid) = @_;
4242
4243 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4244 $ret =~ s/^\s+//;
4245
4246 return 1 if $ret eq "";
4247
4248 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4249 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4250
4251 die "deleting drive $deviceid failed : $ret\n";
4252 }
4253
4254 sub qemu_deviceaddverify {
4255 my ($vmid, $deviceid) = @_;
4256
4257 for (my $i = 0; $i <= 5; $i++) {
4258 my $devices_list = vm_devices_list($vmid);
4259 return 1 if defined($devices_list->{$deviceid});
4260 sleep 1;
4261 }
4262
4263 die "error on hotplug device '$deviceid'\n";
4264 }
4265
4266
4267 sub qemu_devicedelverify {
4268 my ($vmid, $deviceid) = @_;
4269
4270 # need to verify that the device is correctly removed as device_del
4271 # is async and empty return is not reliable
4272
4273 for (my $i = 0; $i <= 5; $i++) {
4274 my $devices_list = vm_devices_list($vmid);
4275 return 1 if !defined($devices_list->{$deviceid});
4276 sleep 1;
4277 }
4278
4279 die "error on hot-unplugging device '$deviceid'\n";
4280 }
4281
4282 sub qemu_findorcreatescsihw {
4283 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4284
4285 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4286
4287 my $scsihwid="$controller_prefix$controller";
4288 my $devices_list = vm_devices_list($vmid);
4289
4290 if (!defined($devices_list->{$scsihwid})) {
4291 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4292 }
4293
4294 return 1;
4295 }
4296
4297 sub qemu_deletescsihw {
4298 my ($conf, $vmid, $opt) = @_;
4299
4300 my $device = parse_drive($opt, $conf->{$opt});
4301
4302 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4303 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4304 return 1;
4305 }
4306
4307 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4308
4309 my $devices_list = vm_devices_list($vmid);
4310 foreach my $opt (keys %{$devices_list}) {
4311 if (is_valid_drivename($opt)) {
4312 my $drive = parse_drive($opt, $conf->{$opt});
4313 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4314 return 1;
4315 }
4316 }
4317 }
4318
4319 my $scsihwid="scsihw$controller";
4320
4321 vm_deviceunplug($vmid, $conf, $scsihwid);
4322
4323 return 1;
4324 }
4325
4326 sub qemu_add_pci_bridge {
4327 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4328
4329 my $bridges = {};
4330
4331 my $bridgeid;
4332
4333 print_pci_addr($device, $bridges, $arch, $machine_type);
4334
4335 while (my ($k, $v) = each %$bridges) {
4336 $bridgeid = $k;
4337 }
4338 return 1 if !defined($bridgeid) || $bridgeid < 1;
4339
4340 my $bridge = "pci.$bridgeid";
4341 my $devices_list = vm_devices_list($vmid);
4342
4343 if (!defined($devices_list->{$bridge})) {
4344 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4345 }
4346
4347 return 1;
4348 }
4349
4350 sub qemu_set_link_status {
4351 my ($vmid, $device, $up) = @_;
4352
4353 mon_cmd($vmid, "set_link", name => $device,
4354 up => $up ? JSON::true : JSON::false);
4355 }
4356
4357 sub qemu_netdevadd {
4358 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4359
4360 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4361 my %options = split(/[=,]/, $netdev);
4362
4363 if (defined(my $vhost = $options{vhost})) {
4364 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4365 }
4366
4367 if (defined(my $queues = $options{queues})) {
4368 $options{queues} = $queues + 0;
4369 }
4370
4371 mon_cmd($vmid, "netdev_add", %options);
4372 return 1;
4373 }
4374
4375 sub qemu_netdevdel {
4376 my ($vmid, $deviceid) = @_;
4377
4378 mon_cmd($vmid, "netdev_del", id => $deviceid);
4379 }
4380
4381 sub qemu_usb_hotplug {
4382 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4383
4384 return if !$device;
4385
4386 # remove the old one first
4387 vm_deviceunplug($vmid, $conf, $deviceid);
4388
4389 # check if xhci controller is necessary and available
4390 if ($device->{usb3}) {
4391
4392 my $devicelist = vm_devices_list($vmid);
4393
4394 if (!$devicelist->{xhci}) {
4395 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4396 qemu_deviceadd($vmid, "nec-usb-xhci,id=xhci$pciaddr");
4397 }
4398 }
4399 my $d = parse_usb_device($device->{host});
4400 $d->{usb3} = $device->{usb3};
4401
4402 # add the new one
4403 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d, $arch, $machine_type);
4404 }
4405
4406 sub qemu_cpu_hotplug {
4407 my ($vmid, $conf, $vcpus) = @_;
4408
4409 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4410
4411 my $sockets = 1;
4412 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4413 $sockets = $conf->{sockets} if $conf->{sockets};
4414 my $cores = $conf->{cores} || 1;
4415 my $maxcpus = $sockets * $cores;
4416
4417 $vcpus = $maxcpus if !$vcpus;
4418
4419 die "you can't add more vcpus than maxcpus\n"
4420 if $vcpus > $maxcpus;
4421
4422 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4423
4424 if ($vcpus < $currentvcpus) {
4425
4426 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4427
4428 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4429 qemu_devicedel($vmid, "cpu$i");
4430 my $retry = 0;
4431 my $currentrunningvcpus = undef;
4432 while (1) {
4433 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4434 last if scalar(@{$currentrunningvcpus}) == $i-1;
4435 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4436 $retry++;
4437 sleep 1;
4438 }
4439 #update conf after each succesfull cpu unplug
4440 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4441 PVE::QemuConfig->write_config($vmid, $conf);
4442 }
4443 } else {
4444 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4445 }
4446
4447 return;
4448 }
4449
4450 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4451 die "vcpus in running vm does not match its configuration\n"
4452 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4453
4454 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4455
4456 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4457 my $cpustr = print_cpu_device($conf, $i);
4458 qemu_deviceadd($vmid, $cpustr);
4459
4460 my $retry = 0;
4461 my $currentrunningvcpus = undef;
4462 while (1) {
4463 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4464 last if scalar(@{$currentrunningvcpus}) == $i;
4465 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4466 sleep 1;
4467 $retry++;
4468 }
4469 #update conf after each succesfull cpu hotplug
4470 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4471 PVE::QemuConfig->write_config($vmid, $conf);
4472 }
4473 } else {
4474
4475 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4476 mon_cmd($vmid, "cpu-add", id => int($i));
4477 }
4478 }
4479 }
4480
4481 sub qemu_block_set_io_throttle {
4482 my ($vmid, $deviceid,
4483 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4484 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4485 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4486 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4487
4488 return if !check_running($vmid) ;
4489
4490 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4491 bps => int($bps),
4492 bps_rd => int($bps_rd),
4493 bps_wr => int($bps_wr),
4494 iops => int($iops),
4495 iops_rd => int($iops_rd),
4496 iops_wr => int($iops_wr),
4497 bps_max => int($bps_max),
4498 bps_rd_max => int($bps_rd_max),
4499 bps_wr_max => int($bps_wr_max),
4500 iops_max => int($iops_max),
4501 iops_rd_max => int($iops_rd_max),
4502 iops_wr_max => int($iops_wr_max),
4503 bps_max_length => int($bps_max_length),
4504 bps_rd_max_length => int($bps_rd_max_length),
4505 bps_wr_max_length => int($bps_wr_max_length),
4506 iops_max_length => int($iops_max_length),
4507 iops_rd_max_length => int($iops_rd_max_length),
4508 iops_wr_max_length => int($iops_wr_max_length),
4509 );
4510
4511 }
4512
4513 sub qemu_block_resize {
4514 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4515
4516 my $running = check_running($vmid);
4517
4518 $size = 0 if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4519
4520 return if !$running;
4521
4522 my $padding = (1024 - $size % 1024) % 1024;
4523 $size = $size + $padding;
4524
4525 mon_cmd(
4526 $vmid,
4527 "block_resize",
4528 device => $deviceid,
4529 size => int($size),
4530 timeout => 60,
4531 );
4532 }
4533
4534 sub qemu_volume_snapshot {
4535 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4536
4537 my $running = check_running($vmid);
4538
4539 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4540 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4541 } else {
4542 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4543 }
4544 }
4545
4546 sub qemu_volume_snapshot_delete {
4547 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4548
4549 my $running = check_running($vmid);
4550
4551 if($running) {
4552
4553 $running = undef;
4554 my $conf = PVE::QemuConfig->load_config($vmid);
4555 PVE::QemuConfig->foreach_volume($conf, sub {
4556 my ($ds, $drive) = @_;
4557 $running = 1 if $drive->{file} eq $volid;
4558 });
4559 }
4560
4561 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4562 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
4563 } else {
4564 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4565 }
4566 }
4567
4568 sub set_migration_caps {
4569 my ($vmid, $savevm) = @_;
4570
4571 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4572
4573 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4574 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4575
4576 my $cap_ref = [];
4577
4578 my $enabled_cap = {
4579 "auto-converge" => 1,
4580 "xbzrle" => 1,
4581 "x-rdma-pin-all" => 0,
4582 "zero-blocks" => 0,
4583 "compress" => 0,
4584 "dirty-bitmaps" => $dirty_bitmaps,
4585 };
4586
4587 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4588
4589 for my $supported_capability (@$supported_capabilities) {
4590 push @$cap_ref, {
4591 capability => $supported_capability->{capability},
4592 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4593 };
4594 }
4595
4596 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4597 }
4598
4599 sub foreach_volid {
4600 my ($conf, $func, @param) = @_;
4601
4602 my $volhash = {};
4603
4604 my $test_volid = sub {
4605 my ($key, $drive, $snapname) = @_;
4606
4607 my $volid = $drive->{file};
4608 return if !$volid;
4609
4610 $volhash->{$volid}->{cdrom} //= 1;
4611 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4612
4613 my $replicate = $drive->{replicate} // 1;
4614 $volhash->{$volid}->{replicate} //= 0;
4615 $volhash->{$volid}->{replicate} = 1 if $replicate;
4616
4617 $volhash->{$volid}->{shared} //= 0;
4618 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4619
4620 $volhash->{$volid}->{referenced_in_config} //= 0;
4621 $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname);
4622
4623 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4624 if defined($snapname);
4625
4626 my $size = $drive->{size};
4627 $volhash->{$volid}->{size} //= $size if $size;
4628
4629 $volhash->{$volid}->{is_vmstate} //= 0;
4630 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4631
4632 $volhash->{$volid}->{is_tpmstate} //= 0;
4633 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4634
4635 $volhash->{$volid}->{is_unused} //= 0;
4636 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4637
4638 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4639 };
4640
4641 my $include_opts = {
4642 extra_keys => ['vmstate'],
4643 include_unused => 1,
4644 };
4645
4646 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4647 foreach my $snapname (keys %{$conf->{snapshots}}) {
4648 my $snap = $conf->{snapshots}->{$snapname};
4649 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4650 }
4651
4652 foreach my $volid (keys %$volhash) {
4653 &$func($volid, $volhash->{$volid}, @param);
4654 }
4655 }
4656
4657 my $fast_plug_option = {
4658 'lock' => 1,
4659 'name' => 1,
4660 'onboot' => 1,
4661 'shares' => 1,
4662 'startup' => 1,
4663 'description' => 1,
4664 'protection' => 1,
4665 'vmstatestorage' => 1,
4666 'hookscript' => 1,
4667 'tags' => 1,
4668 };
4669
4670 # hotplug changes in [PENDING]
4671 # $selection hash can be used to only apply specified options, for
4672 # example: { cores => 1 } (only apply changed 'cores')
4673 # $errors ref is used to return error messages
4674 sub vmconfig_hotplug_pending {
4675 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4676
4677 my $defaults = load_defaults();
4678 my $arch = get_vm_arch($conf);
4679 my $machine_type = get_vm_machine($conf, undef, $arch);
4680
4681 # commit values which do not have any impact on running VM first
4682 # Note: those option cannot raise errors, we we do not care about
4683 # $selection and always apply them.
4684
4685 my $add_error = sub {
4686 my ($opt, $msg) = @_;
4687 $errors->{$opt} = "hotplug problem - $msg";
4688 };
4689
4690 my $changes = 0;
4691 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4692 if ($fast_plug_option->{$opt}) {
4693 $conf->{$opt} = $conf->{pending}->{$opt};
4694 delete $conf->{pending}->{$opt};
4695 $changes = 1;
4696 }
4697 }
4698
4699 if ($changes) {
4700 PVE::QemuConfig->write_config($vmid, $conf);
4701 }
4702
4703 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
4704
4705 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
4706 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4707 foreach my $opt (sort keys %$pending_delete_hash) {
4708 next if $selection && !$selection->{$opt};
4709 my $force = $pending_delete_hash->{$opt}->{force};
4710 eval {
4711 if ($opt eq 'hotplug') {
4712 die "skip\n" if ($conf->{hotplug} =~ /memory/);
4713 } elsif ($opt eq 'tablet') {
4714 die "skip\n" if !$hotplug_features->{usb};
4715 if ($defaults->{tablet}) {
4716 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4717 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4718 if $arch eq 'aarch64';
4719 } else {
4720 vm_deviceunplug($vmid, $conf, 'tablet');
4721 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4722 }
4723 } elsif ($opt =~ m/^usb\d+/) {
4724 die "skip\n";
4725 # since we cannot reliably hot unplug usb devices we are disabling it
4726 #die "skip\n" if !$hotplug_features->{usb} || $conf->{$opt} =~ m/spice/i;
4727 #vm_deviceunplug($vmid, $conf, $opt);
4728 } elsif ($opt eq 'vcpus') {
4729 die "skip\n" if !$hotplug_features->{cpu};
4730 qemu_cpu_hotplug($vmid, $conf, undef);
4731 } elsif ($opt eq 'balloon') {
4732 # enable balloon device is not hotpluggable
4733 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
4734 # here we reset the ballooning value to memory
4735 my $balloon = $conf->{memory} || $defaults->{memory};
4736 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4737 } elsif ($fast_plug_option->{$opt}) {
4738 # do nothing
4739 } elsif ($opt =~ m/^net(\d+)$/) {
4740 die "skip\n" if !$hotplug_features->{network};
4741 vm_deviceunplug($vmid, $conf, $opt);
4742 } elsif (is_valid_drivename($opt)) {
4743 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
4744 vm_deviceunplug($vmid, $conf, $opt);
4745 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4746 } elsif ($opt =~ m/^memory$/) {
4747 die "skip\n" if !$hotplug_features->{memory};
4748 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
4749 } elsif ($opt eq 'cpuunits') {
4750 $cgroup->change_cpu_shares(undef, 1024);
4751 } elsif ($opt eq 'cpulimit') {
4752 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
4753 } else {
4754 die "skip\n";
4755 }
4756 };
4757 if (my $err = $@) {
4758 &$add_error($opt, $err) if $err ne "skip\n";
4759 } else {
4760 delete $conf->{$opt};
4761 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4762 }
4763 }
4764
4765 my ($apply_pending_cloudinit, $apply_pending_cloudinit_done);
4766 $apply_pending_cloudinit = sub {
4767 return if $apply_pending_cloudinit_done; # once is enough
4768 $apply_pending_cloudinit_done = 1; # once is enough
4769
4770 my ($key, $value) = @_;
4771
4772 my @cloudinit_opts = keys %$confdesc_cloudinit;
4773 foreach my $opt (keys %{$conf->{pending}}) {
4774 next if !grep { $_ eq $opt } @cloudinit_opts;
4775 $conf->{$opt} = delete $conf->{pending}->{$opt};
4776 }
4777
4778 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4779 foreach my $opt (sort keys %$pending_delete_hash) {
4780 next if !grep { $_ eq $opt } @cloudinit_opts;
4781 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4782 delete $conf->{$opt};
4783 }
4784
4785 my $new_conf = { %$conf };
4786 $new_conf->{$key} = $value;
4787 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($new_conf, $vmid);
4788 };
4789
4790 foreach my $opt (keys %{$conf->{pending}}) {
4791 next if $selection && !$selection->{$opt};
4792 my $value = $conf->{pending}->{$opt};
4793 eval {
4794 if ($opt eq 'hotplug') {
4795 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
4796 } elsif ($opt eq 'tablet') {
4797 die "skip\n" if !$hotplug_features->{usb};
4798 if ($value == 1) {
4799 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4800 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4801 if $arch eq 'aarch64';
4802 } elsif ($value == 0) {
4803 vm_deviceunplug($vmid, $conf, 'tablet');
4804 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4805 }
4806 } elsif ($opt =~ m/^usb\d+$/) {
4807 die "skip\n";
4808 # since we cannot reliably hot unplug usb devices we disable it for now
4809 #die "skip\n" if !$hotplug_features->{usb} || $value =~ m/spice/i;
4810 #my $d = eval { parse_property_string($usbdesc->{format}, $value) };
4811 #die "skip\n" if !$d;
4812 #qemu_usb_hotplug($storecfg, $conf, $vmid, $opt, $d, $arch, $machine_type);
4813 } elsif ($opt eq 'vcpus') {
4814 die "skip\n" if !$hotplug_features->{cpu};
4815 qemu_cpu_hotplug($vmid, $conf, $value);
4816 } elsif ($opt eq 'balloon') {
4817 # enable/disable balloning device is not hotpluggable
4818 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
4819 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
4820 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
4821
4822 # allow manual ballooning if shares is set to zero
4823 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
4824 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
4825 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4826 }
4827 } elsif ($opt =~ m/^net(\d+)$/) {
4828 # some changes can be done without hotplug
4829 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
4830 $vmid, $opt, $value, $arch, $machine_type);
4831 } elsif (is_valid_drivename($opt)) {
4832 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
4833 # some changes can be done without hotplug
4834 my $drive = parse_drive($opt, $value);
4835 if (drive_is_cloudinit($drive)) {
4836 &$apply_pending_cloudinit($opt, $value);
4837 }
4838 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
4839 $vmid, $opt, $value, $arch, $machine_type);
4840 } elsif ($opt =~ m/^memory$/) { #dimms
4841 die "skip\n" if !$hotplug_features->{memory};
4842 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
4843 } elsif ($opt eq 'cpuunits') {
4844 $cgroup->change_cpu_shares($conf->{pending}->{$opt}, 1024);
4845 } elsif ($opt eq 'cpulimit') {
4846 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
4847 $cgroup->change_cpu_quota($cpulimit, 100000);
4848 } else {
4849 die "skip\n"; # skip non-hot-pluggable options
4850 }
4851 };
4852 if (my $err = $@) {
4853 &$add_error($opt, $err) if $err ne "skip\n";
4854 } else {
4855 $conf->{$opt} = $value;
4856 delete $conf->{pending}->{$opt};
4857 }
4858 }
4859
4860 PVE::QemuConfig->write_config($vmid, $conf);
4861 }
4862
4863 sub try_deallocate_drive {
4864 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
4865
4866 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
4867 my $volid = $drive->{file};
4868 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
4869 my $sid = PVE::Storage::parse_volume_id($volid);
4870 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
4871
4872 # check if the disk is really unused
4873 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
4874 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
4875 PVE::Storage::vdisk_free($storecfg, $volid);
4876 return 1;
4877 } else {
4878 # If vm is not owner of this disk remove from config
4879 return 1;
4880 }
4881 }
4882
4883 return;
4884 }
4885
4886 sub vmconfig_delete_or_detach_drive {
4887 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
4888
4889 my $drive = parse_drive($opt, $conf->{$opt});
4890
4891 my $rpcenv = PVE::RPCEnvironment::get();
4892 my $authuser = $rpcenv->get_user();
4893
4894 if ($force) {
4895 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
4896 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
4897 } else {
4898 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
4899 }
4900 }
4901
4902
4903
4904 sub vmconfig_apply_pending {
4905 my ($vmid, $conf, $storecfg, $errors) = @_;
4906
4907 my $add_apply_error = sub {
4908 my ($opt, $msg) = @_;
4909 my $err_msg = "unable to apply pending change $opt : $msg";
4910 $errors->{$opt} = $err_msg;
4911 warn $err_msg;
4912 };
4913
4914 # cold plug
4915
4916 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4917 foreach my $opt (sort keys %$pending_delete_hash) {
4918 my $force = $pending_delete_hash->{$opt}->{force};
4919 eval {
4920 if ($opt =~ m/^unused/) {
4921 die "internal error";
4922 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
4923 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4924 }
4925 };
4926 if (my $err = $@) {
4927 $add_apply_error->($opt, $err);
4928 } else {
4929 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4930 delete $conf->{$opt};
4931 }
4932 }
4933
4934 PVE::QemuConfig->cleanup_pending($conf);
4935
4936 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4937 next if $opt eq 'delete'; # just to be sure
4938 eval {
4939 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
4940 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
4941 }
4942 };
4943 if (my $err = $@) {
4944 $add_apply_error->($opt, $err);
4945 } else {
4946 $conf->{$opt} = delete $conf->{pending}->{$opt};
4947 }
4948 }
4949
4950 # write all changes at once to avoid unnecessary i/o
4951 PVE::QemuConfig->write_config($vmid, $conf);
4952 }
4953
4954 sub vmconfig_update_net {
4955 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
4956
4957 my $newnet = parse_net($value);
4958
4959 if ($conf->{$opt}) {
4960 my $oldnet = parse_net($conf->{$opt});
4961
4962 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
4963 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
4964 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
4965 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
4966
4967 # for non online change, we try to hot-unplug
4968 die "skip\n" if !$hotplug;
4969 vm_deviceunplug($vmid, $conf, $opt);
4970 } else {
4971
4972 die "internal error" if $opt !~ m/net(\d+)/;
4973 my $iface = "tap${vmid}i$1";
4974
4975 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
4976 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
4977 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
4978 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
4979 PVE::Network::tap_unplug($iface);
4980
4981 if ($have_sdn) {
4982 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
4983 } else {
4984 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
4985 }
4986 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
4987 # Rate can be applied on its own but any change above needs to
4988 # include the rate in tap_plug since OVS resets everything.
4989 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
4990 }
4991
4992 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
4993 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
4994 }
4995
4996 return 1;
4997 }
4998 }
4999
5000 if ($hotplug) {
5001 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5002 } else {
5003 die "skip\n";
5004 }
5005 }
5006
5007 sub vmconfig_update_disk {
5008 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5009
5010 my $drive = parse_drive($opt, $value);
5011
5012 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5013 my $media = $drive->{media} || 'disk';
5014 my $oldmedia = $old_drive->{media} || 'disk';
5015 die "unable to change media type\n" if $media ne $oldmedia;
5016
5017 if (!drive_is_cdrom($old_drive)) {
5018
5019 if ($drive->{file} ne $old_drive->{file}) {
5020
5021 die "skip\n" if !$hotplug;
5022
5023 # unplug and register as unused
5024 vm_deviceunplug($vmid, $conf, $opt);
5025 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5026
5027 } else {
5028 # update existing disk
5029
5030 # skip non hotpluggable value
5031 if (safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5032 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5033 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5034 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5035 safe_string_ne($drive->{ssd}, $old_drive->{ssd})) {
5036 die "skip\n";
5037 }
5038
5039 # apply throttle
5040 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5041 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5042 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5043 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5044 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5045 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5046 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5047 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5048 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5049 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5050 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5051 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5052 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5053 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5054 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5055 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5056 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5057 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5058
5059 qemu_block_set_io_throttle(
5060 $vmid,"drive-$opt",
5061 ($drive->{mbps} || 0)*1024*1024,
5062 ($drive->{mbps_rd} || 0)*1024*1024,
5063 ($drive->{mbps_wr} || 0)*1024*1024,
5064 $drive->{iops} || 0,
5065 $drive->{iops_rd} || 0,
5066 $drive->{iops_wr} || 0,
5067 ($drive->{mbps_max} || 0)*1024*1024,
5068 ($drive->{mbps_rd_max} || 0)*1024*1024,
5069 ($drive->{mbps_wr_max} || 0)*1024*1024,
5070 $drive->{iops_max} || 0,
5071 $drive->{iops_rd_max} || 0,
5072 $drive->{iops_wr_max} || 0,
5073 $drive->{bps_max_length} || 1,
5074 $drive->{bps_rd_max_length} || 1,
5075 $drive->{bps_wr_max_length} || 1,
5076 $drive->{iops_max_length} || 1,
5077 $drive->{iops_rd_max_length} || 1,
5078 $drive->{iops_wr_max_length} || 1,
5079 );
5080
5081 }
5082
5083 return 1;
5084 }
5085
5086 } else { # cdrom
5087
5088 if ($drive->{file} eq 'none') {
5089 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5090 if (drive_is_cloudinit($old_drive)) {
5091 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5092 }
5093 } else {
5094 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5095
5096 # force eject if locked
5097 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5098
5099 if ($path) {
5100 mon_cmd($vmid, "blockdev-change-medium",
5101 id => "$opt", filename => "$path");
5102 }
5103 }
5104
5105 return 1;
5106 }
5107 }
5108
5109 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5110 # hotplug new disks
5111 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5112 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5113 }
5114
5115 # called in locked context by incoming migration
5116 sub vm_migrate_get_nbd_disks {
5117 my ($storecfg, $conf, $replicated_volumes) = @_;
5118
5119 my $local_volumes = {};
5120 PVE::QemuConfig->foreach_volume($conf, sub {
5121 my ($ds, $drive) = @_;
5122
5123 return if drive_is_cdrom($drive);
5124
5125 my $volid = $drive->{file};
5126
5127 return if !$volid;
5128
5129 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5130
5131 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5132 return if $scfg->{shared};
5133
5134 # replicated disks re-use existing state via bitmap
5135 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5136 $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
5137 });
5138 return $local_volumes;
5139 }
5140
5141 # called in locked context by incoming migration
5142 sub vm_migrate_alloc_nbd_disks {
5143 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5144
5145 my $format = undef;
5146
5147 my $nbd = {};
5148 foreach my $opt (sort keys %$source_volumes) {
5149 my ($volid, $storeid, $volname, $drive, $use_existing) = @{$source_volumes->{$opt}};
5150
5151 if ($use_existing) {
5152 $nbd->{$opt}->{drivestr} = print_drive($drive);
5153 $nbd->{$opt}->{volid} = $volid;
5154 $nbd->{$opt}->{replicated} = 1;
5155 next;
5156 }
5157
5158 # If a remote storage is specified and the format of the original
5159 # volume is not available there, fall back to the default format.
5160 # Otherwise use the same format as the original.
5161 if (!$storagemap->{identity}) {
5162 $storeid = map_storage($storagemap, $storeid);
5163 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5164 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5165 my $fileFormat = qemu_img_format($scfg, $volname);
5166 $format = (grep {$fileFormat eq $_} @{$validFormats}) ? $fileFormat : $defFormat;
5167 } else {
5168 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5169 $format = qemu_img_format($scfg, $volname);
5170 }
5171
5172 my $size = $drive->{size} / 1024;
5173 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5174 my $newdrive = $drive;
5175 $newdrive->{format} = $format;
5176 $newdrive->{file} = $newvolid;
5177 my $drivestr = print_drive($newdrive);
5178 $nbd->{$opt}->{drivestr} = $drivestr;
5179 $nbd->{$opt}->{volid} = $newvolid;
5180 }
5181
5182 return $nbd;
5183 }
5184
5185 # see vm_start_nolock for parameters, additionally:
5186 # migrate_opts:
5187 # storagemap = parsed storage map for allocating NBD disks
5188 sub vm_start {
5189 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5190
5191 return PVE::QemuConfig->lock_config($vmid, sub {
5192 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5193
5194 die "you can't start a vm if it's a template\n"
5195 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5196
5197 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5198 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5199
5200 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5201
5202 if ($has_backup_lock && $running) {
5203 # a backup is currently running, attempt to start the guest in the
5204 # existing QEMU instance
5205 return vm_resume($vmid);
5206 }
5207
5208 PVE::QemuConfig->check_lock($conf)
5209 if !($params->{skiplock} || $has_suspended_lock);
5210
5211 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5212
5213 die "VM $vmid already running\n" if $running;
5214
5215 if (my $storagemap = $migrate_opts->{storagemap}) {
5216 my $replicated = $migrate_opts->{replicated_volumes};
5217 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5218 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5219
5220 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5221 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5222 }
5223 }
5224
5225 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5226 });
5227 }
5228
5229
5230 # params:
5231 # statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5232 # skiplock => 0/1, skip checking for config lock
5233 # skiptemplate => 0/1, skip checking whether VM is template
5234 # forcemachine => to force Qemu machine (rollback/migration)
5235 # forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5236 # timeout => in seconds
5237 # paused => start VM in paused state (backup)
5238 # resume => resume from hibernation
5239 # pbs-backing => {
5240 # sata0 => {
5241 # repository
5242 # snapshot
5243 # keyfile
5244 # archive
5245 # },
5246 # virtio2 => ...
5247 # }
5248 # migrate_opts:
5249 # nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5250 # migratedfrom => source node
5251 # spice_ticket => used for spice migration, passed via tunnel/stdin
5252 # network => CIDR of migration network
5253 # type => secure/insecure - tunnel over encrypted connection or plain-text
5254 # nbd_proto_version => int, 0 for TCP, 1 for UNIX
5255 # replicated_volumes = which volids should be re-used with bitmaps for nbd migration
5256 sub vm_start_nolock {
5257 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5258
5259 my $statefile = $params->{statefile};
5260 my $resume = $params->{resume};
5261
5262 my $migratedfrom = $migrate_opts->{migratedfrom};
5263 my $migration_type = $migrate_opts->{type};
5264
5265 my $res = {};
5266
5267 # clean up leftover reboot request files
5268 eval { clear_reboot_request($vmid); };
5269 warn $@ if $@;
5270
5271 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5272 vmconfig_apply_pending($vmid, $conf, $storecfg);
5273 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5274 }
5275
5276 # don't regenerate the ISO if the VM is started as part of a live migration
5277 # this way we can reuse the old ISO with the correct config
5278 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if !$migratedfrom;
5279
5280 my $defaults = load_defaults();
5281
5282 # set environment variable useful inside network script
5283 $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom;
5284
5285 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5286
5287 my $forcemachine = $params->{forcemachine};
5288 my $forcecpu = $params->{forcecpu};
5289 if ($resume) {
5290 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5291 $forcemachine = $conf->{runningmachine};
5292 $forcecpu = $conf->{runningcpu};
5293 print "Resuming suspended VM\n";
5294 }
5295
5296 my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid,
5297 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
5298
5299 my $migration_ip;
5300 my $get_migration_ip = sub {
5301 my ($nodename) = @_;
5302
5303 return $migration_ip if defined($migration_ip);
5304
5305 my $cidr = $migrate_opts->{network};
5306
5307 if (!defined($cidr)) {
5308 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5309 $cidr = $dc_conf->{migration}->{network};
5310 }
5311
5312 if (defined($cidr)) {
5313 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5314
5315 die "could not get IP: no address configured on local " .
5316 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5317
5318 die "could not get IP: multiple addresses configured on local " .
5319 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5320
5321 $migration_ip = @$ips[0];
5322 }
5323
5324 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5325 if !defined($migration_ip);
5326
5327 return $migration_ip;
5328 };
5329
5330 my $migrate_uri;
5331 if ($statefile) {
5332 if ($statefile eq 'tcp') {
5333 my $localip = "localhost";
5334 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5335 my $nodename = nodename();
5336
5337 if (!defined($migration_type)) {
5338 if (defined($datacenterconf->{migration}->{type})) {
5339 $migration_type = $datacenterconf->{migration}->{type};
5340 } else {
5341 $migration_type = 'secure';
5342 }
5343 }
5344
5345 if ($migration_type eq 'insecure') {
5346 $localip = $get_migration_ip->($nodename);
5347 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5348 }
5349
5350 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5351 my $migrate_port = PVE::Tools::next_migrate_port($pfamily);
5352 $migrate_uri = "tcp:${localip}:${migrate_port}";
5353 push @$cmd, '-incoming', $migrate_uri;
5354 push @$cmd, '-S';
5355
5356 } elsif ($statefile eq 'unix') {
5357 # should be default for secure migrations as a ssh TCP forward
5358 # tunnel is not deterministic reliable ready and fails regurarly
5359 # to set up in time, so use UNIX socket forwards
5360 my $socket_addr = "/run/qemu-server/$vmid.migrate";
5361 unlink $socket_addr;
5362
5363 $migrate_uri = "unix:$socket_addr";
5364
5365 push @$cmd, '-incoming', $migrate_uri;
5366 push @$cmd, '-S';
5367
5368 } elsif (-e $statefile) {
5369 push @$cmd, '-loadstate', $statefile;
5370 } else {
5371 my $statepath = PVE::Storage::path($storecfg, $statefile);
5372 push @$vollist, $statefile;
5373 push @$cmd, '-loadstate', $statepath;
5374 }
5375 } elsif ($params->{paused}) {
5376 push @$cmd, '-S';
5377 }
5378
5379 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5380
5381 my $pci_devices = {}; # host pci devices
5382 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
5383 my $dev = $conf->{"hostpci$i"} or next;
5384 $pci_devices->{$i} = parse_hostpci($dev);
5385 }
5386
5387 my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } values $pci_devices->%* ];
5388 # reserve all PCI IDs before actually doing anything with them
5389 PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout);
5390
5391 eval {
5392 for my $id (sort keys %$pci_devices) {
5393 my $d = $pci_devices->{$id};
5394 for my $dev ($d->{pciid}->@*) {
5395 PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
5396 }
5397 }
5398 };
5399 if (my $err = $@) {
5400 eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
5401 warn $@ if $@;
5402 die $err;
5403 }
5404
5405 PVE::Storage::activate_volumes($storecfg, $vollist);
5406
5407 eval {
5408 run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{});
5409 };
5410 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5411 # timeout should be more than enough here...
5412 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 5);
5413
5414 my $cpuunits = get_cpuunits($conf);
5415
5416 my %run_params = (
5417 timeout => $statefile ? undef : $start_timeout,
5418 umask => 0077,
5419 noerr => 1,
5420 );
5421
5422 # when migrating, prefix QEMU output so other side can pick up any
5423 # errors that might occur and show the user
5424 if ($migratedfrom) {
5425 $run_params{quiet} = 1;
5426 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5427 }
5428
5429 my %systemd_properties = (
5430 Slice => 'qemu.slice',
5431 KillMode => 'process',
5432 SendSIGKILL => 0,
5433 TimeoutStopUSec => ULONG_MAX, # infinity
5434 );
5435
5436 if (PVE::CGroup::cgroup_mode() == 2) {
5437 $cpuunits = 10000 if $cpuunits >= 10000; # else we get an error
5438 $systemd_properties{CPUWeight} = $cpuunits;
5439 } else {
5440 $systemd_properties{CPUShares} = $cpuunits;
5441 }
5442
5443 if (my $cpulimit = $conf->{cpulimit}) {
5444 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5445 }
5446 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5447
5448 my $run_qemu = sub {
5449 PVE::Tools::run_fork sub {
5450 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5451
5452 my $tpmpid;
5453 if (my $tpm = $conf->{tpmstate0}) {
5454 # start the TPM emulator so QEMU can connect on start
5455 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5456 }
5457
5458 my $exitcode = run_command($cmd, %run_params);
5459 if ($exitcode) {
5460 if ($tpmpid) {
5461 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5462 kill 'TERM', $tpmpid;
5463 }
5464 die "QEMU exited with code $exitcode\n";
5465 }
5466 };
5467 };
5468
5469 if ($conf->{hugepages}) {
5470
5471 my $code = sub {
5472 my $hugepages_topology = PVE::QemuServer::Memory::hugepages_topology($conf);
5473 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5474
5475 PVE::QemuServer::Memory::hugepages_mount();
5476 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5477
5478 eval { $run_qemu->() };
5479 if (my $err = $@) {
5480 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5481 if !$conf->{keephugepages};
5482 die $err;
5483 }
5484
5485 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5486 if !$conf->{keephugepages};
5487 };
5488 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5489
5490 } else {
5491 eval { $run_qemu->() };
5492 }
5493
5494 if (my $err = $@) {
5495 # deactivate volumes if start fails
5496 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5497 eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
5498
5499 die "start failed: $err";
5500 }
5501
5502 # re-reserve all PCI IDs now that we can know the actual VM PID
5503 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5504 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) };
5505 warn $@ if $@;
5506
5507 print "migration listens on $migrate_uri\n" if $migrate_uri;
5508 $res->{migrate_uri} = $migrate_uri;
5509
5510 if ($statefile && $statefile ne 'tcp' && $statefile ne 'unix') {
5511 eval { mon_cmd($vmid, "cont"); };
5512 warn $@ if $@;
5513 }
5514
5515 #start nbd server for storage migration
5516 if (my $nbd = $migrate_opts->{nbd}) {
5517 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
5518
5519 my $migrate_storage_uri;
5520 # nbd_protocol_version > 0 for unix socket support
5521 if ($nbd_protocol_version > 0 && $migration_type eq 'secure') {
5522 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5523 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5524 $migrate_storage_uri = "nbd:unix:$socket_path";
5525 } else {
5526 my $nodename = nodename();
5527 my $localip = $get_migration_ip->($nodename);
5528 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5529 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
5530
5531 mon_cmd($vmid, "nbd-server-start", addr => {
5532 type => 'inet',
5533 data => {
5534 host => "${localip}",
5535 port => "${storage_migrate_port}",
5536 },
5537 });
5538 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5539 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
5540 }
5541
5542 $res->{migrate_storage_uri} = $migrate_storage_uri;
5543
5544 foreach my $opt (sort keys %$nbd) {
5545 my $drivestr = $nbd->{$opt}->{drivestr};
5546 my $volid = $nbd->{$opt}->{volid};
5547 mon_cmd($vmid, "nbd-server-add", device => "drive-$opt", writable => JSON::true );
5548 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
5549 print "storage migration listens on $nbd_uri volume:$drivestr\n";
5550 print "re-using replicated volume: $opt - $volid\n"
5551 if $nbd->{$opt}->{replicated};
5552
5553 $res->{drives}->{$opt} = $nbd->{$opt};
5554 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
5555 }
5556 }
5557
5558 if ($migratedfrom) {
5559 eval {
5560 set_migration_caps($vmid);
5561 };
5562 warn $@ if $@;
5563
5564 if ($spice_port) {
5565 print "spice listens on port $spice_port\n";
5566 $res->{spice_port} = $spice_port;
5567 if ($migrate_opts->{spice_ticket}) {
5568 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
5569 $migrate_opts->{spice_ticket});
5570 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
5571 }
5572 }
5573
5574 } else {
5575 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
5576 if !$statefile && $conf->{balloon};
5577
5578 foreach my $opt (keys %$conf) {
5579 next if $opt !~ m/^net\d+$/;
5580 my $nicconf = parse_net($conf->{$opt});
5581 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
5582 }
5583 }
5584
5585 mon_cmd($vmid, 'qom-set',
5586 path => "machine/peripheral/balloon0",
5587 property => "guest-stats-polling-interval",
5588 value => 2) if (!defined($conf->{balloon}) || $conf->{balloon});
5589
5590 if ($resume) {
5591 print "Resumed VM, removing state\n";
5592 if (my $vmstate = $conf->{vmstate}) {
5593 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
5594 PVE::Storage::vdisk_free($storecfg, $vmstate);
5595 }
5596 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
5597 PVE::QemuConfig->write_config($vmid, $conf);
5598 }
5599
5600 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
5601
5602 return $res;
5603 }
5604
5605 sub vm_commandline {
5606 my ($storecfg, $vmid, $snapname) = @_;
5607
5608 my $conf = PVE::QemuConfig->load_config($vmid);
5609 my $forcemachine;
5610 my $forcecpu;
5611
5612 if ($snapname) {
5613 my $snapshot = $conf->{snapshots}->{$snapname};
5614 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
5615
5616 # check for machine or CPU overrides in snapshot
5617 $forcemachine = $snapshot->{runningmachine};
5618 $forcecpu = $snapshot->{runningcpu};
5619
5620 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
5621
5622 $conf = $snapshot;
5623 }
5624
5625 my $defaults = load_defaults();
5626
5627 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults,
5628 $forcemachine, $forcecpu);
5629
5630 return PVE::Tools::cmd2string($cmd);
5631 }
5632
5633 sub vm_reset {
5634 my ($vmid, $skiplock) = @_;
5635
5636 PVE::QemuConfig->lock_config($vmid, sub {
5637
5638 my $conf = PVE::QemuConfig->load_config($vmid);
5639
5640 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5641
5642 mon_cmd($vmid, "system_reset");
5643 });
5644 }
5645
5646 sub get_vm_volumes {
5647 my ($conf) = @_;
5648
5649 my $vollist = [];
5650 foreach_volid($conf, sub {
5651 my ($volid, $attr) = @_;
5652
5653 return if $volid =~ m|^/|;
5654
5655 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
5656 return if !$sid;
5657
5658 push @$vollist, $volid;
5659 });
5660
5661 return $vollist;
5662 }
5663
5664 sub vm_stop_cleanup {
5665 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
5666
5667 eval {
5668
5669 if (!$keepActive) {
5670 my $vollist = get_vm_volumes($conf);
5671 PVE::Storage::deactivate_volumes($storecfg, $vollist);
5672
5673 if (my $tpmdrive = $conf->{tpmstate0}) {
5674 my $tpm = parse_drive("tpmstate0", $tpmdrive);
5675 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
5676 if ($storeid) {
5677 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
5678 }
5679 }
5680 }
5681
5682 foreach my $ext (qw(mon qmp pid vnc qga)) {
5683 unlink "/var/run/qemu-server/${vmid}.$ext";
5684 }
5685
5686 if ($conf->{ivshmem}) {
5687 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
5688 # just delete it for now, VMs which have this already open do not
5689 # are affected, but new VMs will get a separated one. If this
5690 # becomes an issue we either add some sort of ref-counting or just
5691 # add a "don't delete on stop" flag to the ivshmem format.
5692 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
5693 }
5694
5695 my $ids = [];
5696 foreach my $key (keys %$conf) {
5697 next if $key !~ m/^hostpci(\d+)$/;
5698 my $hostpciindex = $1;
5699 my $d = parse_hostpci($conf->{$key});
5700 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
5701
5702 foreach my $pci (@{$d->{pciid}}) {
5703 my $pciid = $pci->{id};
5704 push @$ids, $pci->{id};
5705 PVE::SysFSTools::pci_cleanup_mdev_device($pciid, $uuid);
5706 }
5707 }
5708 PVE::QemuServer::PCI::remove_pci_reservation($ids);
5709
5710 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
5711 };
5712 warn $@ if $@; # avoid errors - just warn
5713 }
5714
5715 # call only in locked context
5716 sub _do_vm_stop {
5717 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
5718
5719 my $pid = check_running($vmid, $nocheck);
5720 return if !$pid;
5721
5722 my $conf;
5723 if (!$nocheck) {
5724 $conf = PVE::QemuConfig->load_config($vmid);
5725 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5726 if (!defined($timeout) && $shutdown && $conf->{startup}) {
5727 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
5728 $timeout = $opts->{down} if $opts->{down};
5729 }
5730 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
5731 }
5732
5733 eval {
5734 if ($shutdown) {
5735 if (defined($conf) && get_qga_key($conf, 'enabled')) {
5736 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
5737 } else {
5738 mon_cmd($vmid, "system_powerdown");
5739 }
5740 } else {
5741 mon_cmd($vmid, "quit");
5742 }
5743 };
5744 my $err = $@;
5745
5746 if (!$err) {
5747 $timeout = 60 if !defined($timeout);
5748
5749 my $count = 0;
5750 while (($count < $timeout) && check_running($vmid, $nocheck)) {
5751 $count++;
5752 sleep 1;
5753 }
5754
5755 if ($count >= $timeout) {
5756 if ($force) {
5757 warn "VM still running - terminating now with SIGTERM\n";
5758 kill 15, $pid;
5759 } else {
5760 die "VM quit/powerdown failed - got timeout\n";
5761 }
5762 } else {
5763 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
5764 return;
5765 }
5766 } else {
5767 if (!check_running($vmid, $nocheck)) {
5768 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
5769 return;
5770 }
5771 if ($force) {
5772 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
5773 kill 15, $pid;
5774 } else {
5775 die "VM quit/powerdown failed\n";
5776 }
5777 }
5778
5779 # wait again
5780 $timeout = 10;
5781
5782 my $count = 0;
5783 while (($count < $timeout) && check_running($vmid, $nocheck)) {
5784 $count++;
5785 sleep 1;
5786 }
5787
5788 if ($count >= $timeout) {
5789 warn "VM still running - terminating now with SIGKILL\n";
5790 kill 9, $pid;
5791 sleep 1;
5792 }
5793
5794 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
5795 }
5796
5797 # Note: use $nocheck to skip tests if VM configuration file exists.
5798 # We need that when migration VMs to other nodes (files already moved)
5799 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
5800 sub vm_stop {
5801 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
5802
5803 $force = 1 if !defined($force) && !$shutdown;
5804
5805 if ($migratedfrom){
5806 my $pid = check_running($vmid, $nocheck, $migratedfrom);
5807 kill 15, $pid if $pid;
5808 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
5809 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
5810 return;
5811 }
5812
5813 PVE::QemuConfig->lock_config($vmid, sub {
5814 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
5815 });
5816 }
5817
5818 sub vm_reboot {
5819 my ($vmid, $timeout) = @_;
5820
5821 PVE::QemuConfig->lock_config($vmid, sub {
5822 eval {
5823
5824 # only reboot if running, as qmeventd starts it again on a stop event
5825 return if !check_running($vmid);
5826
5827 create_reboot_request($vmid);
5828
5829 my $storecfg = PVE::Storage::config();
5830 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
5831
5832 };
5833 if (my $err = $@) {
5834 # avoid that the next normal shutdown will be confused for a reboot
5835 clear_reboot_request($vmid);
5836 die $err;
5837 }
5838 });
5839 }
5840
5841 # note: if using the statestorage parameter, the caller has to check privileges
5842 sub vm_suspend {
5843 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
5844
5845 my $conf;
5846 my $path;
5847 my $storecfg;
5848 my $vmstate;
5849
5850 PVE::QemuConfig->lock_config($vmid, sub {
5851
5852 $conf = PVE::QemuConfig->load_config($vmid);
5853
5854 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
5855 PVE::QemuConfig->check_lock($conf)
5856 if !($skiplock || $is_backing_up);
5857
5858 die "cannot suspend to disk during backup\n"
5859 if $is_backing_up && $includestate;
5860
5861 if ($includestate) {
5862 $conf->{lock} = 'suspending';
5863 my $date = strftime("%Y-%m-%d", localtime(time()));
5864 $storecfg = PVE::Storage::config();
5865 if (!$statestorage) {
5866 $statestorage = find_vmstate_storage($conf, $storecfg);
5867 # check permissions for the storage
5868 my $rpcenv = PVE::RPCEnvironment::get();
5869 if ($rpcenv->{type} ne 'cli') {
5870 my $authuser = $rpcenv->get_user();
5871 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
5872 }
5873 }
5874
5875
5876 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
5877 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
5878 $path = PVE::Storage::path($storecfg, $vmstate);
5879 PVE::QemuConfig->write_config($vmid, $conf);
5880 } else {
5881 mon_cmd($vmid, "stop");
5882 }
5883 });
5884
5885 if ($includestate) {
5886 # save vm state
5887 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
5888
5889 eval {
5890 set_migration_caps($vmid, 1);
5891 mon_cmd($vmid, "savevm-start", statefile => $path);
5892 for(;;) {
5893 my $state = mon_cmd($vmid, "query-savevm");
5894 if (!$state->{status}) {
5895 die "savevm not active\n";
5896 } elsif ($state->{status} eq 'active') {
5897 sleep(1);
5898 next;
5899 } elsif ($state->{status} eq 'completed') {
5900 print "State saved, quitting\n";
5901 last;
5902 } elsif ($state->{status} eq 'failed' && $state->{error}) {
5903 die "query-savevm failed with error '$state->{error}'\n"
5904 } else {
5905 die "query-savevm returned status '$state->{status}'\n";
5906 }
5907 }
5908 };
5909 my $err = $@;
5910
5911 PVE::QemuConfig->lock_config($vmid, sub {
5912 $conf = PVE::QemuConfig->load_config($vmid);
5913 if ($err) {
5914 # cleanup, but leave suspending lock, to indicate something went wrong
5915 eval {
5916 mon_cmd($vmid, "savevm-end");
5917 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
5918 PVE::Storage::vdisk_free($storecfg, $vmstate);
5919 delete $conf->@{qw(vmstate runningmachine runningcpu)};
5920 PVE::QemuConfig->write_config($vmid, $conf);
5921 };
5922 warn $@ if $@;
5923 die $err;
5924 }
5925
5926 die "lock changed unexpectedly\n"
5927 if !PVE::QemuConfig->has_lock($conf, 'suspending');
5928
5929 mon_cmd($vmid, "quit");
5930 $conf->{lock} = 'suspended';
5931 PVE::QemuConfig->write_config($vmid, $conf);
5932 });
5933 }
5934 }
5935
5936 sub vm_resume {
5937 my ($vmid, $skiplock, $nocheck) = @_;
5938
5939 PVE::QemuConfig->lock_config($vmid, sub {
5940 my $res = mon_cmd($vmid, 'query-status');
5941 my $resume_cmd = 'cont';
5942 my $reset = 0;
5943
5944 if ($res->{status}) {
5945 return if $res->{status} eq 'running'; # job done, go home
5946 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
5947 $reset = 1 if $res->{status} eq 'shutdown';
5948 }
5949
5950 if (!$nocheck) {
5951
5952 my $conf = PVE::QemuConfig->load_config($vmid);
5953
5954 PVE::QemuConfig->check_lock($conf)
5955 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
5956 }
5957
5958 if ($reset) {
5959 # required if a VM shuts down during a backup and we get a resume
5960 # request before the backup finishes for example
5961 mon_cmd($vmid, "system_reset");
5962 }
5963 mon_cmd($vmid, $resume_cmd);
5964 });
5965 }
5966
5967 sub vm_sendkey {
5968 my ($vmid, $skiplock, $key) = @_;
5969
5970 PVE::QemuConfig->lock_config($vmid, sub {
5971
5972 my $conf = PVE::QemuConfig->load_config($vmid);
5973
5974 # there is no qmp command, so we use the human monitor command
5975 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
5976 die $res if $res ne '';
5977 });
5978 }
5979
5980 # vzdump restore implementaion
5981
5982 sub tar_archive_read_firstfile {
5983 my $archive = shift;
5984
5985 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
5986
5987 # try to detect archive type first
5988 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
5989 die "unable to open file '$archive'\n";
5990 my $firstfile = <$fh>;
5991 kill 15, $pid;
5992 close $fh;
5993
5994 die "ERROR: archive contaions no data\n" if !$firstfile;
5995 chomp $firstfile;
5996
5997 return $firstfile;
5998 }
5999
6000 sub tar_restore_cleanup {
6001 my ($storecfg, $statfile) = @_;
6002
6003 print STDERR "starting cleanup\n";
6004
6005 if (my $fd = IO::File->new($statfile, "r")) {
6006 while (defined(my $line = <$fd>)) {
6007 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6008 my $volid = $2;
6009 eval {
6010 if ($volid =~ m|^/|) {
6011 unlink $volid || die 'unlink failed\n';
6012 } else {
6013 PVE::Storage::vdisk_free($storecfg, $volid);
6014 }
6015 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6016 };
6017 print STDERR "unable to cleanup '$volid' - $@" if $@;
6018 } else {
6019 print STDERR "unable to parse line in statfile - $line";
6020 }
6021 }
6022 $fd->close();
6023 }
6024 }
6025
6026 sub restore_file_archive {
6027 my ($archive, $vmid, $user, $opts) = @_;
6028
6029 return restore_vma_archive($archive, $vmid, $user, $opts)
6030 if $archive eq '-';
6031
6032 my $info = PVE::Storage::archive_info($archive);
6033 my $format = $opts->{format} // $info->{format};
6034 my $comp = $info->{compression};
6035
6036 # try to detect archive format
6037 if ($format eq 'tar') {
6038 return restore_tar_archive($archive, $vmid, $user, $opts);
6039 } else {
6040 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6041 }
6042 }
6043
6044 # hepler to remove disks that will not be used after restore
6045 my $restore_cleanup_oldconf = sub {
6046 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6047
6048 PVE::QemuConfig->foreach_volume($oldconf, sub {
6049 my ($ds, $drive) = @_;
6050
6051 return if drive_is_cdrom($drive, 1);
6052
6053 my $volid = $drive->{file};
6054 return if !$volid || $volid =~ m|^/|;
6055
6056 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6057 return if !$path || !$owner || ($owner != $vmid);
6058
6059 # Note: only delete disk we want to restore
6060 # other volumes will become unused
6061 if ($virtdev_hash->{$ds}) {
6062 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6063 if (my $err = $@) {
6064 warn $err;
6065 }
6066 }
6067 });
6068
6069 # delete vmstate files, after the restore we have no snapshots anymore
6070 foreach my $snapname (keys %{$oldconf->{snapshots}}) {
6071 my $snap = $oldconf->{snapshots}->{$snapname};
6072 if ($snap->{vmstate}) {
6073 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6074 if (my $err = $@) {
6075 warn $err;
6076 }
6077 }
6078 }
6079 };
6080
6081 # Helper to parse vzdump backup device hints
6082 #
6083 # $rpcenv: Environment, used to ckeck storage permissions
6084 # $user: User ID, to check storage permissions
6085 # $storecfg: Storage configuration
6086 # $fh: the file handle for reading the configuration
6087 # $devinfo: should contain device sizes for all backu-up'ed devices
6088 # $options: backup options (pool, default storage)
6089 #
6090 # Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6091 my $parse_backup_hints = sub {
6092 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6093
6094 my $virtdev_hash = {};
6095
6096 while (defined(my $line = <$fh>)) {
6097 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6098 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6099 die "archive does not contain data for drive '$virtdev'\n"
6100 if !$devinfo->{$devname};
6101
6102 if (defined($options->{storage})) {
6103 $storeid = $options->{storage} || 'local';
6104 } elsif (!$storeid) {
6105 $storeid = 'local';
6106 }
6107 $format = 'raw' if !$format;
6108 $devinfo->{$devname}->{devname} = $devname;
6109 $devinfo->{$devname}->{virtdev} = $virtdev;
6110 $devinfo->{$devname}->{format} = $format;
6111 $devinfo->{$devname}->{storeid} = $storeid;
6112
6113 # check permission on storage
6114 my $pool = $options->{pool}; # todo: do we need that?
6115 if ($user ne 'root@pam') {
6116 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace']);
6117 }
6118
6119 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6120 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6121 my $virtdev = $1;
6122 my $drive = parse_drive($virtdev, $2);
6123 if (drive_is_cloudinit($drive)) {
6124 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6125 $storeid = $options->{storage} if defined ($options->{storage});
6126 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6127 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6128
6129 $virtdev_hash->{$virtdev} = {
6130 format => $format,
6131 storeid => $storeid,
6132 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6133 is_cloudinit => 1,
6134 };
6135 }
6136 }
6137 }
6138
6139 return $virtdev_hash;
6140 };
6141
6142 # Helper to allocate and activate all volumes required for a restore
6143 #
6144 # $storecfg: Storage configuration
6145 # $virtdev_hash: as returned by parse_backup_hints()
6146 #
6147 # Returns: { $virtdev => $volid }
6148 my $restore_allocate_devices = sub {
6149 my ($storecfg, $virtdev_hash, $vmid) = @_;
6150
6151 my $map = {};
6152 foreach my $virtdev (sort keys %$virtdev_hash) {
6153 my $d = $virtdev_hash->{$virtdev};
6154 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6155 my $storeid = $d->{storeid};
6156 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6157
6158 # test if requested format is supported
6159 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6160 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6161 $d->{format} = $defFormat if !$supported;
6162
6163 my $name;
6164 if ($d->{is_cloudinit}) {
6165 $name = "vm-$vmid-cloudinit";
6166 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6167 if ($scfg->{path}) {
6168 $name .= ".$d->{format}";
6169 }
6170 }
6171
6172 my $volid = PVE::Storage::vdisk_alloc(
6173 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6174
6175 print STDERR "new volume ID is '$volid'\n";
6176 $d->{volid} = $volid;
6177
6178 PVE::Storage::activate_volumes($storecfg, [$volid]);
6179
6180 $map->{$virtdev} = $volid;
6181 }
6182
6183 return $map;
6184 };
6185
6186 sub restore_update_config_line {
6187 my ($cookie, $map, $line, $unique) = @_;
6188
6189 return '' if $line =~ m/^\#qmdump\#/;
6190 return '' if $line =~ m/^\#vzdump\#/;
6191 return '' if $line =~ m/^lock:/;
6192 return '' if $line =~ m/^unused\d+:/;
6193 return '' if $line =~ m/^parent:/;
6194
6195 my $res = '';
6196
6197 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6198 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6199 # try to convert old 1.X settings
6200 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6201 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6202 my ($model, $macaddr) = split(/\=/, $devconfig);
6203 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6204 my $net = {
6205 model => $model,
6206 bridge => "vmbr$ind",
6207 macaddr => $macaddr,
6208 };
6209 my $netstr = print_net($net);
6210
6211 $res .= "net$cookie->{netcount}: $netstr\n";
6212 $cookie->{netcount}++;
6213 }
6214 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6215 my ($id, $netstr) = ($1, $2);
6216 my $net = parse_net($netstr);
6217 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6218 $netstr = print_net($net);
6219 $res .= "$id: $netstr\n";
6220 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6221 my $virtdev = $1;
6222 my $value = $3;
6223 my $di = parse_drive($virtdev, $value);
6224 if (defined($di->{backup}) && !$di->{backup}) {
6225 $res .= "#$line";
6226 } elsif ($map->{$virtdev}) {
6227 delete $di->{format}; # format can change on restore
6228 $di->{file} = $map->{$virtdev};
6229 $value = print_drive($di);
6230 $res .= "$virtdev: $value\n";
6231 } else {
6232 $res .= $line;
6233 }
6234 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6235 my $vmgenid = $1;
6236 if ($vmgenid ne '0') {
6237 # always generate a new vmgenid if there was a valid one setup
6238 $vmgenid = generate_uuid();
6239 }
6240 $res .= "vmgenid: $vmgenid\n";
6241 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6242 my ($uuid, $uuid_str);
6243 UUID::generate($uuid);
6244 UUID::unparse($uuid, $uuid_str);
6245 my $smbios1 = parse_smbios1($2);
6246 $smbios1->{uuid} = $uuid_str;
6247 $res .= $1.print_smbios1($smbios1)."\n";
6248 } else {
6249 $res .= $line;
6250 }
6251
6252 return $res;
6253 }
6254
6255 my $restore_deactivate_volumes = sub {
6256 my ($storecfg, $devinfo) = @_;
6257
6258 my $vollist = [];
6259 foreach my $devname (keys %$devinfo) {
6260 my $volid = $devinfo->{$devname}->{volid};
6261 push @$vollist, $volid if $volid;
6262 }
6263
6264 PVE::Storage::deactivate_volumes($storecfg, $vollist);
6265 };
6266
6267 my $restore_destroy_volumes = sub {
6268 my ($storecfg, $devinfo) = @_;
6269
6270 foreach my $devname (keys %$devinfo) {
6271 my $volid = $devinfo->{$devname}->{volid};
6272 next if !$volid;
6273 eval {
6274 if ($volid =~ m|^/|) {
6275 unlink $volid || die 'unlink failed\n';
6276 } else {
6277 PVE::Storage::vdisk_free($storecfg, $volid);
6278 }
6279 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6280 };
6281 print STDERR "unable to cleanup '$volid' - $@" if $@;
6282 }
6283 };
6284
6285 sub scan_volids {
6286 my ($cfg, $vmid) = @_;
6287
6288 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6289
6290 my $volid_hash = {};
6291 foreach my $storeid (keys %$info) {
6292 foreach my $item (@{$info->{$storeid}}) {
6293 next if !($item->{volid} && $item->{size});
6294 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6295 $volid_hash->{$item->{volid}} = $item;
6296 }
6297 }
6298
6299 return $volid_hash;
6300 }
6301
6302 sub update_disk_config {
6303 my ($vmid, $conf, $volid_hash) = @_;
6304
6305 my $changes;
6306 my $prefix = "VM $vmid";
6307
6308 # used and unused disks
6309 my $referenced = {};
6310
6311 # Note: it is allowed to define multiple storages with same path (alias), so
6312 # we need to check both 'volid' and real 'path' (two different volid can point
6313 # to the same path).
6314
6315 my $referencedpath = {};
6316
6317 # update size info
6318 PVE::QemuConfig->foreach_volume($conf, sub {
6319 my ($opt, $drive) = @_;
6320
6321 my $volid = $drive->{file};
6322 return if !$volid;
6323 my $volume = $volid_hash->{$volid};
6324
6325 # mark volid as "in-use" for next step
6326 $referenced->{$volid} = 1;
6327 if ($volume && (my $path = $volume->{path})) {
6328 $referencedpath->{$path} = 1;
6329 }
6330
6331 return if drive_is_cdrom($drive);
6332 return if !$volume;
6333
6334 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6335 if (defined($updated)) {
6336 $changes = 1;
6337 $conf->{$opt} = print_drive($updated);
6338 print "$prefix ($opt): $msg\n";
6339 }
6340 });
6341
6342 # remove 'unusedX' entry if volume is used
6343 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6344 my ($opt, $drive) = @_;
6345
6346 my $volid = $drive->{file};
6347 return if !$volid;
6348
6349 my $path;
6350 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6351 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6352 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6353 $changes = 1;
6354 delete $conf->{$opt};
6355 }
6356
6357 $referenced->{$volid} = 1;
6358 $referencedpath->{$path} = 1 if $path;
6359 });
6360
6361 foreach my $volid (sort keys %$volid_hash) {
6362 next if $volid =~ m/vm-$vmid-state-/;
6363 next if $referenced->{$volid};
6364 my $path = $volid_hash->{$volid}->{path};
6365 next if !$path; # just to be sure
6366 next if $referencedpath->{$path};
6367 $changes = 1;
6368 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6369 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6370 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6371 }
6372
6373 return $changes;
6374 }
6375
6376 sub rescan {
6377 my ($vmid, $nolock, $dryrun) = @_;
6378
6379 my $cfg = PVE::Storage::config();
6380
6381 print "rescan volumes...\n";
6382 my $volid_hash = scan_volids($cfg, $vmid);
6383
6384 my $updatefn = sub {
6385 my ($vmid) = @_;
6386
6387 my $conf = PVE::QemuConfig->load_config($vmid);
6388
6389 PVE::QemuConfig->check_lock($conf);
6390
6391 my $vm_volids = {};
6392 foreach my $volid (keys %$volid_hash) {
6393 my $info = $volid_hash->{$volid};
6394 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6395 }
6396
6397 my $changes = update_disk_config($vmid, $conf, $vm_volids);
6398
6399 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
6400 };
6401
6402 if (defined($vmid)) {
6403 if ($nolock) {
6404 &$updatefn($vmid);
6405 } else {
6406 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6407 }
6408 } else {
6409 my $vmlist = config_list();
6410 foreach my $vmid (keys %$vmlist) {
6411 if ($nolock) {
6412 &$updatefn($vmid);
6413 } else {
6414 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6415 }
6416 }
6417 }
6418 }
6419
6420 sub restore_proxmox_backup_archive {
6421 my ($archive, $vmid, $user, $options) = @_;
6422
6423 my $storecfg = PVE::Storage::config();
6424
6425 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
6426 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6427
6428 my $fingerprint = $scfg->{fingerprint};
6429 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
6430
6431 my $repo = PVE::PBSClient::get_repository($scfg);
6432
6433 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
6434 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
6435 local $ENV{PBS_PASSWORD} = $password;
6436 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
6437
6438 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
6439 PVE::Storage::parse_volname($storecfg, $archive);
6440
6441 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
6442
6443 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
6444
6445 my $tmpdir = "/var/tmp/vzdumptmp$$";
6446 rmtree $tmpdir;
6447 mkpath $tmpdir;
6448
6449 my $conffile = PVE::QemuConfig->config_file($vmid);
6450 # disable interrupts (always do cleanups)
6451 local $SIG{INT} =
6452 local $SIG{TERM} =
6453 local $SIG{QUIT} =
6454 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
6455
6456 # Note: $oldconf is undef if VM does not exists
6457 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6458 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6459 my $new_conf_raw = '';
6460
6461 my $rpcenv = PVE::RPCEnvironment::get();
6462 my $devinfo = {};
6463
6464 eval {
6465 # enable interrupts
6466 local $SIG{INT} =
6467 local $SIG{TERM} =
6468 local $SIG{QUIT} =
6469 local $SIG{HUP} =
6470 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6471
6472 my $cfgfn = "$tmpdir/qemu-server.conf";
6473 my $firewall_config_fn = "$tmpdir/fw.conf";
6474 my $index_fn = "$tmpdir/index.json";
6475
6476 my $cmd = "restore";
6477
6478 my $param = [$pbs_backup_name, "index.json", $index_fn];
6479 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6480 my $index = PVE::Tools::file_get_contents($index_fn);
6481 $index = decode_json($index);
6482
6483 # print Dumper($index);
6484 foreach my $info (@{$index->{files}}) {
6485 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
6486 my $devname = $1;
6487 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
6488 $devinfo->{$devname}->{size} = $1;
6489 } else {
6490 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
6491 }
6492 }
6493 }
6494
6495 my $is_qemu_server_backup = scalar(
6496 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
6497 );
6498 if (!$is_qemu_server_backup) {
6499 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
6500 }
6501 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
6502
6503 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
6504 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6505
6506 if ($has_firewall_config) {
6507 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
6508 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6509
6510 my $pve_firewall_dir = '/etc/pve/firewall';
6511 mkdir $pve_firewall_dir; # make sure the dir exists
6512 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
6513 }
6514
6515 my $fh = IO::File->new($cfgfn, "r") ||
6516 die "unable to read qemu-server.conf - $!\n";
6517
6518 my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
6519
6520 # fixme: rate limit?
6521
6522 # create empty/temp config
6523 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
6524
6525 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
6526
6527 # allocate volumes
6528 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
6529
6530 foreach my $virtdev (sort keys %$virtdev_hash) {
6531 my $d = $virtdev_hash->{$virtdev};
6532 next if $d->{is_cloudinit}; # no need to restore cloudinit
6533
6534 # this fails if storage is unavailable
6535 my $volid = $d->{volid};
6536 my $path = PVE::Storage::path($storecfg, $volid);
6537
6538 # for live-restore we only want to preload the efidisk and TPM state
6539 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
6540
6541 my $pbs_restore_cmd = [
6542 '/usr/bin/pbs-restore',
6543 '--repository', $repo,
6544 $pbs_backup_name,
6545 "$d->{devname}.img.fidx",
6546 $path,
6547 '--verbose',
6548 ];
6549
6550 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
6551 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
6552
6553 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
6554 push @$pbs_restore_cmd, '--skip-zero';
6555 }
6556
6557 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
6558 print "restore proxmox backup image: $dbg_cmdstring\n";
6559 run_command($pbs_restore_cmd);
6560 }
6561
6562 $fh->seek(0, 0) || die "seek failed - $!\n";
6563
6564 my $cookie = { netcount => 0 };
6565 while (defined(my $line = <$fh>)) {
6566 $new_conf_raw .= restore_update_config_line(
6567 $cookie,
6568 $map,
6569 $line,
6570 $options->{unique},
6571 );
6572 }
6573
6574 $fh->close();
6575 };
6576 my $err = $@;
6577
6578 if ($err || !$options->{live}) {
6579 $restore_deactivate_volumes->($storecfg, $devinfo);
6580 }
6581
6582 rmtree $tmpdir;
6583
6584 if ($err) {
6585 $restore_destroy_volumes->($storecfg, $devinfo);
6586 die $err;
6587 }
6588
6589 if ($options->{live}) {
6590 # keep lock during live-restore
6591 $new_conf_raw .= "\nlock: create";
6592 }
6593
6594 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
6595
6596 PVE::Cluster::cfs_update(); # make sure we read new file
6597
6598 eval { rescan($vmid, 1); };
6599 warn $@ if $@;
6600
6601 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
6602
6603 if ($options->{live}) {
6604 # enable interrupts
6605 local $SIG{INT} =
6606 local $SIG{TERM} =
6607 local $SIG{QUIT} =
6608 local $SIG{HUP} =
6609 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
6610
6611 my $conf = PVE::QemuConfig->load_config($vmid);
6612 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
6613
6614 # these special drives are already restored before start
6615 delete $devinfo->{'drive-efidisk0'};
6616 delete $devinfo->{'drive-tpmstate0-backup'};
6617 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $repo, $keyfile, $pbs_backup_name);
6618
6619 PVE::QemuConfig->remove_lock($vmid, "create");
6620 }
6621 }
6622
6623 sub pbs_live_restore {
6624 my ($vmid, $conf, $storecfg, $restored_disks, $repo, $keyfile, $snap) = @_;
6625
6626 print "starting VM for live-restore\n";
6627 print "repository: '$repo', snapshot: '$snap'\n";
6628
6629 my $pbs_backing = {};
6630 for my $ds (keys %$restored_disks) {
6631 $ds =~ m/^drive-(.*)$/;
6632 my $confname = $1;
6633 $pbs_backing->{$confname} = {
6634 repository => $repo,
6635 snapshot => $snap,
6636 archive => "$ds.img.fidx",
6637 };
6638 $pbs_backing->{$confname}->{keyfile} = $keyfile if -e $keyfile;
6639
6640 my $drive = parse_drive($confname, $conf->{$confname});
6641 print "restoring '$ds' to '$drive->{file}'\n";
6642 }
6643
6644 my $drives_streamed = 0;
6645 eval {
6646 # make sure HA doesn't interrupt our restore by stopping the VM
6647 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
6648 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
6649 }
6650
6651 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
6652 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
6653 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
6654
6655 my $qmeventd_fd = register_qmeventd_handle($vmid);
6656
6657 # begin streaming, i.e. data copy from PBS to target disk for every vol,
6658 # this will effectively collapse the backing image chain consisting of
6659 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
6660 # removes itself once all backing images vanish with 'auto-remove=on')
6661 my $jobs = {};
6662 for my $ds (sort keys %$restored_disks) {
6663 my $job_id = "restore-$ds";
6664 mon_cmd($vmid, 'block-stream',
6665 'job-id' => $job_id,
6666 device => "$ds",
6667 );
6668 $jobs->{$job_id} = {};
6669 }
6670
6671 mon_cmd($vmid, 'cont');
6672 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
6673
6674 print "restore-drive jobs finished successfully, removing all tracking block devices"
6675 ." to disconnect from Proxmox Backup Server\n";
6676
6677 for my $ds (sort keys %$restored_disks) {
6678 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
6679 }
6680
6681 close($qmeventd_fd);
6682 };
6683
6684 my $err = $@;
6685
6686 if ($err) {
6687 warn "An error occured during live-restore: $err\n";
6688 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
6689 die "live-restore failed\n";
6690 }
6691 }
6692
6693 sub restore_vma_archive {
6694 my ($archive, $vmid, $user, $opts, $comp) = @_;
6695
6696 my $readfrom = $archive;
6697
6698 my $cfg = PVE::Storage::config();
6699 my $commands = [];
6700 my $bwlimit = $opts->{bwlimit};
6701
6702 my $dbg_cmdstring = '';
6703 my $add_pipe = sub {
6704 my ($cmd) = @_;
6705 push @$commands, $cmd;
6706 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
6707 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
6708 $readfrom = '-';
6709 };
6710
6711 my $input = undef;
6712 if ($archive eq '-') {
6713 $input = '<&STDIN';
6714 } else {
6715 # If we use a backup from a PVE defined storage we also consider that
6716 # storage's rate limit:
6717 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
6718 if (defined($volid)) {
6719 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
6720 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
6721 if ($readlimit) {
6722 print STDERR "applying read rate limit: $readlimit\n";
6723 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
6724 $add_pipe->($cstream);
6725 }
6726 }
6727 }
6728
6729 if ($comp) {
6730 my $info = PVE::Storage::decompressor_info('vma', $comp);
6731 my $cmd = $info->{decompressor};
6732 push @$cmd, $readfrom;
6733 $add_pipe->($cmd);
6734 }
6735
6736 my $tmpdir = "/var/tmp/vzdumptmp$$";
6737 rmtree $tmpdir;
6738
6739 # disable interrupts (always do cleanups)
6740 local $SIG{INT} =
6741 local $SIG{TERM} =
6742 local $SIG{QUIT} =
6743 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
6744
6745 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
6746 POSIX::mkfifo($mapfifo, 0600);
6747 my $fifofh;
6748 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
6749
6750 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
6751
6752 my $oldtimeout;
6753 my $timeout = 5;
6754
6755 my $devinfo = {};
6756
6757 my $rpcenv = PVE::RPCEnvironment::get();
6758
6759 my $conffile = PVE::QemuConfig->config_file($vmid);
6760
6761 # Note: $oldconf is undef if VM does not exist
6762 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6763 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6764 my $new_conf_raw = '';
6765
6766 my %storage_limits;
6767
6768 my $print_devmap = sub {
6769 my $cfgfn = "$tmpdir/qemu-server.conf";
6770
6771 # we can read the config - that is already extracted
6772 my $fh = IO::File->new($cfgfn, "r") ||
6773 die "unable to read qemu-server.conf - $!\n";
6774
6775 my $fwcfgfn = "$tmpdir/qemu-server.fw";
6776 if (-f $fwcfgfn) {
6777 my $pve_firewall_dir = '/etc/pve/firewall';
6778 mkdir $pve_firewall_dir; # make sure the dir exists
6779 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
6780 }
6781
6782 my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
6783
6784 foreach my $info (values %{$virtdev_hash}) {
6785 my $storeid = $info->{storeid};
6786 next if defined($storage_limits{$storeid});
6787
6788 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
6789 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
6790 $storage_limits{$storeid} = $limit * 1024;
6791 }
6792
6793 foreach my $devname (keys %$devinfo) {
6794 die "found no device mapping information for device '$devname'\n"
6795 if !$devinfo->{$devname}->{virtdev};
6796 }
6797
6798 # create empty/temp config
6799 if ($oldconf) {
6800 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
6801 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
6802 }
6803
6804 # allocate volumes
6805 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
6806
6807 # print restore information to $fifofh
6808 foreach my $virtdev (sort keys %$virtdev_hash) {
6809 my $d = $virtdev_hash->{$virtdev};
6810 next if $d->{is_cloudinit}; # no need to restore cloudinit
6811
6812 my $storeid = $d->{storeid};
6813 my $volid = $d->{volid};
6814
6815 my $map_opts = '';
6816 if (my $limit = $storage_limits{$storeid}) {
6817 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
6818 }
6819
6820 my $write_zeros = 1;
6821 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
6822 $write_zeros = 0;
6823 }
6824
6825 my $path = PVE::Storage::path($cfg, $volid);
6826
6827 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
6828
6829 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
6830 }
6831
6832 $fh->seek(0, 0) || die "seek failed - $!\n";
6833
6834 my $cookie = { netcount => 0 };
6835 while (defined(my $line = <$fh>)) {
6836 $new_conf_raw .= restore_update_config_line(
6837 $cookie,
6838 $map,
6839 $line,
6840 $opts->{unique},
6841 );
6842 }
6843
6844 $fh->close();
6845 };
6846
6847 eval {
6848 # enable interrupts
6849 local $SIG{INT} =
6850 local $SIG{TERM} =
6851 local $SIG{QUIT} =
6852 local $SIG{HUP} =
6853 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6854 local $SIG{ALRM} = sub { die "got timeout\n"; };
6855
6856 $oldtimeout = alarm($timeout);
6857
6858 my $parser = sub {
6859 my $line = shift;
6860
6861 print "$line\n";
6862
6863 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
6864 my ($dev_id, $size, $devname) = ($1, $2, $3);
6865 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
6866 } elsif ($line =~ m/^CTIME: /) {
6867 # we correctly received the vma config, so we can disable
6868 # the timeout now for disk allocation (set to 10 minutes, so
6869 # that we always timeout if something goes wrong)
6870 alarm(600);
6871 &$print_devmap();
6872 print $fifofh "done\n";
6873 my $tmp = $oldtimeout || 0;
6874 $oldtimeout = undef;
6875 alarm($tmp);
6876 close($fifofh);
6877 $fifofh = undef;
6878 }
6879 };
6880
6881 print "restore vma archive: $dbg_cmdstring\n";
6882 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
6883 };
6884 my $err = $@;
6885
6886 alarm($oldtimeout) if $oldtimeout;
6887
6888 $restore_deactivate_volumes->($cfg, $devinfo);
6889
6890 close($fifofh) if $fifofh;
6891 unlink $mapfifo;
6892 rmtree $tmpdir;
6893
6894 if ($err) {
6895 $restore_destroy_volumes->($cfg, $devinfo);
6896 die $err;
6897 }
6898
6899 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
6900
6901 PVE::Cluster::cfs_update(); # make sure we read new file
6902
6903 eval { rescan($vmid, 1); };
6904 warn $@ if $@;
6905
6906 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
6907 }
6908
6909 sub restore_tar_archive {
6910 my ($archive, $vmid, $user, $opts) = @_;
6911
6912 if ($archive ne '-') {
6913 my $firstfile = tar_archive_read_firstfile($archive);
6914 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
6915 if $firstfile ne 'qemu-server.conf';
6916 }
6917
6918 my $storecfg = PVE::Storage::config();
6919
6920 # avoid zombie disks when restoring over an existing VM -> cleanup first
6921 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
6922 # skiplock=1 because qmrestore has set the 'create' lock itself already
6923 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
6924 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
6925
6926 my $tocmd = "/usr/lib/qemu-server/qmextract";
6927
6928 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
6929 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
6930 $tocmd .= ' --prealloc' if $opts->{prealloc};
6931 $tocmd .= ' --info' if $opts->{info};
6932
6933 # tar option "xf" does not autodetect compression when read from STDIN,
6934 # so we pipe to zcat
6935 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
6936 PVE::Tools::shellquote("--to-command=$tocmd");
6937
6938 my $tmpdir = "/var/tmp/vzdumptmp$$";
6939 mkpath $tmpdir;
6940
6941 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
6942 local $ENV{VZDUMP_VMID} = $vmid;
6943 local $ENV{VZDUMP_USER} = $user;
6944
6945 my $conffile = PVE::QemuConfig->config_file($vmid);
6946 my $new_conf_raw = '';
6947
6948 # disable interrupts (always do cleanups)
6949 local $SIG{INT} =
6950 local $SIG{TERM} =
6951 local $SIG{QUIT} =
6952 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
6953
6954 eval {
6955 # enable interrupts
6956 local $SIG{INT} =
6957 local $SIG{TERM} =
6958 local $SIG{QUIT} =
6959 local $SIG{HUP} =
6960 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6961
6962 if ($archive eq '-') {
6963 print "extracting archive from STDIN\n";
6964 run_command($cmd, input => "<&STDIN");
6965 } else {
6966 print "extracting archive '$archive'\n";
6967 run_command($cmd);
6968 }
6969
6970 return if $opts->{info};
6971
6972 # read new mapping
6973 my $map = {};
6974 my $statfile = "$tmpdir/qmrestore.stat";
6975 if (my $fd = IO::File->new($statfile, "r")) {
6976 while (defined (my $line = <$fd>)) {
6977 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6978 $map->{$1} = $2 if $1;
6979 } else {
6980 print STDERR "unable to parse line in statfile - $line\n";
6981 }
6982 }
6983 $fd->close();
6984 }
6985
6986 my $confsrc = "$tmpdir/qemu-server.conf";
6987
6988 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
6989
6990 my $cookie = { netcount => 0 };
6991 while (defined (my $line = <$srcfd>)) {
6992 $new_conf_raw .= restore_update_config_line(
6993 $cookie,
6994 $map,
6995 $line,
6996 $opts->{unique},
6997 );
6998 }
6999
7000 $srcfd->close();
7001 };
7002 if (my $err = $@) {
7003 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7004 die $err;
7005 }
7006
7007 rmtree $tmpdir;
7008
7009 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7010
7011 PVE::Cluster::cfs_update(); # make sure we read new file
7012
7013 eval { rescan($vmid, 1); };
7014 warn $@ if $@;
7015 };
7016
7017 sub foreach_storage_used_by_vm {
7018 my ($conf, $func) = @_;
7019
7020 my $sidhash = {};
7021
7022 PVE::QemuConfig->foreach_volume($conf, sub {
7023 my ($ds, $drive) = @_;
7024 return if drive_is_cdrom($drive);
7025
7026 my $volid = $drive->{file};
7027
7028 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7029 $sidhash->{$sid} = $sid if $sid;
7030 });
7031
7032 foreach my $sid (sort keys %$sidhash) {
7033 &$func($sid);
7034 }
7035 }
7036
7037 my $qemu_snap_storage = {
7038 rbd => 1,
7039 };
7040 sub do_snapshots_with_qemu {
7041 my ($storecfg, $volid, $deviceid) = @_;
7042
7043 return if $deviceid =~ m/tpmstate0/;
7044
7045 my $storage_name = PVE::Storage::parse_volume_id($volid);
7046 my $scfg = $storecfg->{ids}->{$storage_name};
7047 die "could not find storage '$storage_name'\n" if !defined($scfg);
7048
7049 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7050 return 1;
7051 }
7052
7053 if ($volid =~ m/\.(qcow2|qed)$/){
7054 return 1;
7055 }
7056
7057 return;
7058 }
7059
7060 sub qga_check_running {
7061 my ($vmid, $nowarn) = @_;
7062
7063 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7064 if ($@) {
7065 warn "Qemu Guest Agent is not running - $@" if !$nowarn;
7066 return 0;
7067 }
7068 return 1;
7069 }
7070
7071 sub template_create {
7072 my ($vmid, $conf, $disk) = @_;
7073
7074 my $storecfg = PVE::Storage::config();
7075
7076 PVE::QemuConfig->foreach_volume($conf, sub {
7077 my ($ds, $drive) = @_;
7078
7079 return if drive_is_cdrom($drive);
7080 return if $disk && $ds ne $disk;
7081
7082 my $volid = $drive->{file};
7083 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7084
7085 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7086 $drive->{file} = $voliddst;
7087 $conf->{$ds} = print_drive($drive);
7088 PVE::QemuConfig->write_config($vmid, $conf);
7089 });
7090 }
7091
7092 sub convert_iscsi_path {
7093 my ($path) = @_;
7094
7095 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7096 my $portal = $1;
7097 my $target = $2;
7098 my $lun = $3;
7099
7100 my $initiator_name = get_initiator_name();
7101
7102 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7103 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7104 }
7105
7106 die "cannot convert iscsi path '$path', unkown format\n";
7107 }
7108
7109 sub qemu_img_convert {
7110 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized) = @_;
7111
7112 my $storecfg = PVE::Storage::config();
7113 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7114 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7115
7116 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7117
7118 my $cachemode;
7119 my $src_path;
7120 my $src_is_iscsi = 0;
7121 my $src_format;
7122
7123 if ($src_storeid) {
7124 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7125 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7126 $src_format = qemu_img_format($src_scfg, $src_volname);
7127 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7128 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7129 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7130 } elsif (-f $src_volid) {
7131 $src_path = $src_volid;
7132 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7133 $src_format = $1;
7134 }
7135 }
7136
7137 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7138
7139 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7140 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7141 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7142 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7143
7144 my $cmd = [];
7145 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7146 push @$cmd, '-l', "snapshot.name=$snapname"
7147 if $snapname && $src_format && $src_format eq "qcow2";
7148 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7149 push @$cmd, '-T', $cachemode if defined($cachemode);
7150
7151 if ($src_is_iscsi) {
7152 push @$cmd, '--image-opts';
7153 $src_path = convert_iscsi_path($src_path);
7154 } elsif ($src_format) {
7155 push @$cmd, '-f', $src_format;
7156 }
7157
7158 if ($dst_is_iscsi) {
7159 push @$cmd, '--target-image-opts';
7160 $dst_path = convert_iscsi_path($dst_path);
7161 } else {
7162 push @$cmd, '-O', $dst_format;
7163 }
7164
7165 push @$cmd, $src_path;
7166
7167 if (!$dst_is_iscsi && $is_zero_initialized) {
7168 push @$cmd, "zeroinit:$dst_path";
7169 } else {
7170 push @$cmd, $dst_path;
7171 }
7172
7173 my $parser = sub {
7174 my $line = shift;
7175 if($line =~ m/\((\S+)\/100\%\)/){
7176 my $percent = $1;
7177 my $transferred = int($size * $percent / 100);
7178 my $total_h = render_bytes($size, 1);
7179 my $transferred_h = render_bytes($transferred, 1);
7180
7181 print "transferred $transferred_h of $total_h ($percent%)\n";
7182 }
7183
7184 };
7185
7186 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7187 my $err = $@;
7188 die "copy failed: $err" if $err;
7189 }
7190
7191 sub qemu_img_format {
7192 my ($scfg, $volname) = @_;
7193
7194 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7195 return $1;
7196 } else {
7197 return "raw";
7198 }
7199 }
7200
7201 sub qemu_drive_mirror {
7202 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7203
7204 $jobs = {} if !$jobs;
7205
7206 my $qemu_target;
7207 my $format;
7208 $jobs->{"drive-$drive"} = {};
7209
7210 if ($dst_volid =~ /^nbd:/) {
7211 $qemu_target = $dst_volid;
7212 $format = "nbd";
7213 } else {
7214 my $storecfg = PVE::Storage::config();
7215 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7216
7217 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7218
7219 $format = qemu_img_format($dst_scfg, $dst_volname);
7220
7221 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7222
7223 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7224 }
7225
7226 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7227 $opts->{format} = $format if $format;
7228
7229 if (defined($src_bitmap)) {
7230 $opts->{sync} = 'incremental';
7231 $opts->{bitmap} = $src_bitmap;
7232 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7233 }
7234
7235 if (defined($bwlimit)) {
7236 $opts->{speed} = $bwlimit * 1024;
7237 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7238 } else {
7239 print "drive mirror is starting for drive-$drive\n";
7240 }
7241
7242 # if a job already runs for this device we get an error, catch it for cleanup
7243 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7244 if (my $err = $@) {
7245 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7246 warn "$@\n" if $@;
7247 die "mirroring error: $err\n";
7248 }
7249
7250 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7251 }
7252
7253 # $completion can be either
7254 # 'complete': wait until all jobs are ready, block-job-complete them (default)
7255 # 'cancel': wait until all jobs are ready, block-job-cancel them
7256 # 'skip': wait until all jobs are ready, return with block jobs in ready state
7257 # 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7258 sub qemu_drive_mirror_monitor {
7259 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7260
7261 $completion //= 'complete';
7262 $op //= "mirror";
7263
7264 eval {
7265 my $err_complete = 0;
7266
7267 my $starttime = time ();
7268 while (1) {
7269 die "block job ('$op') timed out\n" if $err_complete > 300;
7270
7271 my $stats = mon_cmd($vmid, "query-block-jobs");
7272 my $ctime = time();
7273
7274 my $running_jobs = {};
7275 for my $stat (@$stats) {
7276 next if $stat->{type} ne $op;
7277 $running_jobs->{$stat->{device}} = $stat;
7278 }
7279
7280 my $readycounter = 0;
7281
7282 for my $job_id (sort keys %$jobs) {
7283 my $job = $running_jobs->{$job_id};
7284
7285 my $vanished = !defined($job);
7286 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7287 if($complete || ($vanished && $completion eq 'auto')) {
7288 print "$job_id: $op-job finished\n";
7289 delete $jobs->{$job_id};
7290 next;
7291 }
7292
7293 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7294
7295 my $busy = $job->{busy};
7296 my $ready = $job->{ready};
7297 if (my $total = $job->{len}) {
7298 my $transferred = $job->{offset} || 0;
7299 my $remaining = $total - $transferred;
7300 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7301
7302 my $duration = $ctime - $starttime;
7303 my $total_h = render_bytes($total, 1);
7304 my $transferred_h = render_bytes($transferred, 1);
7305
7306 my $status = sprintf(
7307 "transferred $transferred_h of $total_h ($percent%%) in %s",
7308 render_duration($duration),
7309 );
7310
7311 if ($ready) {
7312 if ($busy) {
7313 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7314 } else {
7315 $status .= ", ready";
7316 }
7317 }
7318 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7319 $jobs->{$job_id}->{ready} = $ready;
7320 }
7321
7322 $readycounter++ if $job->{ready};
7323 }
7324
7325 last if scalar(keys %$jobs) == 0;
7326
7327 if ($readycounter == scalar(keys %$jobs)) {
7328 print "all '$op' jobs are ready\n";
7329
7330 # do the complete later (or has already been done)
7331 last if $completion eq 'skip' || $completion eq 'auto';
7332
7333 if ($vmiddst && $vmiddst != $vmid) {
7334 my $agent_running = $qga && qga_check_running($vmid);
7335 if ($agent_running) {
7336 print "freeze filesystem\n";
7337 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
7338 } else {
7339 print "suspend vm\n";
7340 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
7341 }
7342
7343 # if we clone a disk for a new target vm, we don't switch the disk
7344 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
7345
7346 if ($agent_running) {
7347 print "unfreeze filesystem\n";
7348 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
7349 } else {
7350 print "resume vm\n";
7351 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7352 }
7353
7354 last;
7355 } else {
7356
7357 for my $job_id (sort keys %$jobs) {
7358 # try to switch the disk if source and destination are on the same guest
7359 print "$job_id: Completing block job_id...\n";
7360
7361 my $op;
7362 if ($completion eq 'complete') {
7363 $op = 'block-job-complete';
7364 } elsif ($completion eq 'cancel') {
7365 $op = 'block-job-cancel';
7366 } else {
7367 die "invalid completion value: $completion\n";
7368 }
7369 eval { mon_cmd($vmid, $op, device => $job_id) };
7370 if ($@ =~ m/cannot be completed/) {
7371 print "$job_id: block job cannot be completed, trying again.\n";
7372 $err_complete++;
7373 }else {
7374 print "$job_id: Completed successfully.\n";
7375 $jobs->{$job_id}->{complete} = 1;
7376 }
7377 }
7378 }
7379 }
7380 sleep 1;
7381 }
7382 };
7383 my $err = $@;
7384
7385 if ($err) {
7386 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7387 die "block job ($op) error: $err";
7388 }
7389 }
7390
7391 sub qemu_blockjobs_cancel {
7392 my ($vmid, $jobs) = @_;
7393
7394 foreach my $job (keys %$jobs) {
7395 print "$job: Cancelling block job\n";
7396 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
7397 $jobs->{$job}->{cancel} = 1;
7398 }
7399
7400 while (1) {
7401 my $stats = mon_cmd($vmid, "query-block-jobs");
7402
7403 my $running_jobs = {};
7404 foreach my $stat (@$stats) {
7405 $running_jobs->{$stat->{device}} = $stat;
7406 }
7407
7408 foreach my $job (keys %$jobs) {
7409
7410 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
7411 print "$job: Done.\n";
7412 delete $jobs->{$job};
7413 }
7414 }
7415
7416 last if scalar(keys %$jobs) == 0;
7417
7418 sleep 1;
7419 }
7420 }
7421
7422 sub clone_disk {
7423 my ($storecfg, $vmid, $running, $drivename, $drive, $snapname,
7424 $newvmid, $storage, $format, $full, $newvollist, $jobs, $completion, $qga, $bwlimit, $conf) = @_;
7425
7426 my $newvolid;
7427
7428 if (!$full) {
7429 print "create linked clone of drive $drivename ($drive->{file})\n";
7430 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
7431 push @$newvollist, $newvolid;
7432 } else {
7433
7434 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
7435 $storeid = $storage if $storage;
7436
7437 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
7438
7439 print "create full clone of drive $drivename ($drive->{file})\n";
7440 my $name = undef;
7441 my $size = undef;
7442 if (drive_is_cloudinit($drive)) {
7443 $name = "vm-$newvmid-cloudinit";
7444 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7445 if ($scfg->{path}) {
7446 $name .= ".$dst_format";
7447 }
7448 $snapname = undef;
7449 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
7450 } elsif ($drivename eq 'efidisk0') {
7451 $size = get_efivars_size($conf);
7452 } elsif ($drivename eq 'tpmstate0') {
7453 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7454 } else {
7455 ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
7456 }
7457 $newvolid = PVE::Storage::vdisk_alloc(
7458 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
7459 );
7460 push @$newvollist, $newvolid;
7461
7462 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
7463
7464 if (drive_is_cloudinit($drive)) {
7465 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
7466 # if this is the case, we have to complete any block-jobs still there from
7467 # previous drive-mirrors
7468 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
7469 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
7470 }
7471 goto no_data_clone;
7472 }
7473
7474 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
7475 if (!$running || $snapname) {
7476 # TODO: handle bwlimits
7477 if ($drivename eq 'efidisk0') {
7478 # the relevant data on the efidisk may be smaller than the source
7479 # e.g. on RBD/ZFS, so we use dd to copy only the amount
7480 # that is given by the OVMF_VARS.fd
7481 my $src_path = PVE::Storage::path($storecfg, $drive->{file});
7482 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
7483
7484 # better for Ceph if block size is not too small, see bug #3324
7485 my $bs = 1024*1024;
7486
7487 run_command(['qemu-img', 'dd', '-n', '-O', $dst_format, "bs=$bs", "osize=$size",
7488 "if=$src_path", "of=$dst_path"]);
7489 } else {
7490 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit);
7491 }
7492 } else {
7493
7494 die "cannot move TPM state while VM is running\n" if $drivename eq 'tpmstate0';
7495
7496 my $kvmver = get_running_qemu_version ($vmid);
7497 if (!min_version($kvmver, 2, 7)) {
7498 die "drive-mirror with iothread requires qemu version 2.7 or higher\n"
7499 if $drive->{iothread};
7500 }
7501
7502 qemu_drive_mirror($vmid, $drivename, $newvolid, $newvmid, $sparseinit, $jobs,
7503 $completion, $qga, $bwlimit);
7504 }
7505 }
7506
7507 no_data_clone:
7508 my ($size) = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
7509
7510 my $disk = $drive;
7511 $disk->{format} = undef;
7512 $disk->{file} = $newvolid;
7513 $disk->{size} = $size if defined($size);
7514
7515 return $disk;
7516 }
7517
7518 sub get_running_qemu_version {
7519 my ($vmid) = @_;
7520 my $res = mon_cmd($vmid, "query-version");
7521 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
7522 }
7523
7524 sub qemu_use_old_bios_files {
7525 my ($machine_type) = @_;
7526
7527 return if !$machine_type;
7528
7529 my $use_old_bios_files = undef;
7530
7531 if ($machine_type =~ m/^(\S+)\.pxe$/) {
7532 $machine_type = $1;
7533 $use_old_bios_files = 1;
7534 } else {
7535 my $version = extract_version($machine_type, kvm_user_version());
7536 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
7537 # load new efi bios files on migration. So this hack is required to allow
7538 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
7539 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
7540 $use_old_bios_files = !min_version($version, 2, 4);
7541 }
7542
7543 return ($use_old_bios_files, $machine_type);
7544 }
7545
7546 sub get_efivars_size {
7547 my ($conf) = @_;
7548 my $arch = get_vm_arch($conf);
7549 my $efidisk = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
7550 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
7551 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7552 die "uefi vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
7553 return -s $ovmf_vars;
7554 }
7555
7556 sub update_efidisk_size {
7557 my ($conf) = @_;
7558
7559 return if !defined($conf->{efidisk0});
7560
7561 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
7562 $disk->{size} = get_efivars_size($conf);
7563 $conf->{efidisk0} = print_drive($disk);
7564
7565 return;
7566 }
7567
7568 sub update_tpmstate_size {
7569 my ($conf) = @_;
7570
7571 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
7572 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7573 $conf->{tpmstate0} = print_drive($disk);
7574 }
7575
7576 sub create_efidisk($$$$$$$) {
7577 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
7578
7579 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7580 die "EFI vars default image not found\n" if ! -f $ovmf_vars;
7581
7582 my $vars_size_b = -s $ovmf_vars;
7583 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
7584 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
7585 PVE::Storage::activate_volumes($storecfg, [$volid]);
7586
7587 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
7588 my ($size) = PVE::Storage::volume_size_info($storecfg, $volid, 3);
7589
7590 return ($volid, $size/1024);
7591 }
7592
7593 sub vm_iothreads_list {
7594 my ($vmid) = @_;
7595
7596 my $res = mon_cmd($vmid, 'query-iothreads');
7597
7598 my $iothreads = {};
7599 foreach my $iothread (@$res) {
7600 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
7601 }
7602
7603 return $iothreads;
7604 }
7605
7606 sub scsihw_infos {
7607 my ($conf, $drive) = @_;
7608
7609 my $maxdev = 0;
7610
7611 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
7612 $maxdev = 7;
7613 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
7614 $maxdev = 1;
7615 } else {
7616 $maxdev = 256;
7617 }
7618
7619 my $controller = int($drive->{index} / $maxdev);
7620 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
7621 ? "virtioscsi"
7622 : "scsihw";
7623
7624 return ($maxdev, $controller, $controller_prefix);
7625 }
7626
7627 sub windows_version {
7628 my ($ostype) = @_;
7629
7630 return 0 if !$ostype;
7631
7632 my $winversion = 0;
7633
7634 if($ostype eq 'wxp' || $ostype eq 'w2k3' || $ostype eq 'w2k') {
7635 $winversion = 5;
7636 } elsif($ostype eq 'w2k8' || $ostype eq 'wvista') {
7637 $winversion = 6;
7638 } elsif ($ostype =~ m/^win(\d+)$/) {
7639 $winversion = $1;
7640 }
7641
7642 return $winversion;
7643 }
7644
7645 sub resolve_dst_disk_format {
7646 my ($storecfg, $storeid, $src_volname, $format) = @_;
7647 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
7648
7649 if (!$format) {
7650 # if no target format is specified, use the source disk format as hint
7651 if ($src_volname) {
7652 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7653 $format = qemu_img_format($scfg, $src_volname);
7654 } else {
7655 return $defFormat;
7656 }
7657 }
7658
7659 # test if requested format is supported - else use default
7660 my $supported = grep { $_ eq $format } @$validFormats;
7661 $format = $defFormat if !$supported;
7662 return $format;
7663 }
7664
7665 # NOTE: if this logic changes, please update docs & possibly gui logic
7666 sub find_vmstate_storage {
7667 my ($conf, $storecfg) = @_;
7668
7669 # first, return storage from conf if set
7670 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
7671
7672 my ($target, $shared, $local);
7673
7674 foreach_storage_used_by_vm($conf, sub {
7675 my ($sid) = @_;
7676 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
7677 my $dst = $scfg->{shared} ? \$shared : \$local;
7678 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
7679 });
7680
7681 # second, use shared storage where VM has at least one disk
7682 # third, use local storage where VM has at least one disk
7683 # fall back to local storage
7684 $target = $shared // $local // 'local';
7685
7686 return $target;
7687 }
7688
7689 sub generate_uuid {
7690 my ($uuid, $uuid_str);
7691 UUID::generate($uuid);
7692 UUID::unparse($uuid, $uuid_str);
7693 return $uuid_str;
7694 }
7695
7696 sub generate_smbios1_uuid {
7697 return "uuid=".generate_uuid();
7698 }
7699
7700 sub nbd_stop {
7701 my ($vmid) = @_;
7702
7703 mon_cmd($vmid, 'nbd-server-stop');
7704 }
7705
7706 sub create_reboot_request {
7707 my ($vmid) = @_;
7708 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
7709 or die "failed to create reboot trigger file: $!\n";
7710 close($fh);
7711 }
7712
7713 sub clear_reboot_request {
7714 my ($vmid) = @_;
7715 my $path = "/run/qemu-server/$vmid.reboot";
7716 my $res = 0;
7717
7718 $res = unlink($path);
7719 die "could not remove reboot request for $vmid: $!"
7720 if !$res && $! != POSIX::ENOENT;
7721
7722 return $res;
7723 }
7724
7725 sub bootorder_from_legacy {
7726 my ($conf, $bootcfg) = @_;
7727
7728 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
7729 my $bootindex_hash = {};
7730 my $i = 1;
7731 foreach my $o (split(//, $boot)) {
7732 $bootindex_hash->{$o} = $i*100;
7733 $i++;
7734 }
7735
7736 my $bootorder = {};
7737
7738 PVE::QemuConfig->foreach_volume($conf, sub {
7739 my ($ds, $drive) = @_;
7740
7741 if (drive_is_cdrom ($drive, 1)) {
7742 if ($bootindex_hash->{d}) {
7743 $bootorder->{$ds} = $bootindex_hash->{d};
7744 $bootindex_hash->{d} += 1;
7745 }
7746 } elsif ($bootindex_hash->{c}) {
7747 $bootorder->{$ds} = $bootindex_hash->{c}
7748 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
7749 $bootindex_hash->{c} += 1;
7750 }
7751 });
7752
7753 if ($bootindex_hash->{n}) {
7754 for (my $i = 0; $i < $MAX_NETS; $i++) {
7755 my $netname = "net$i";
7756 next if !$conf->{$netname};
7757 $bootorder->{$netname} = $bootindex_hash->{n};
7758 $bootindex_hash->{n} += 1;
7759 }
7760 }
7761
7762 return $bootorder;
7763 }
7764
7765 # Generate default device list for 'boot: order=' property. Matches legacy
7766 # default boot order, but with explicit device names. This is important, since
7767 # the fallback for when neither 'order' nor the old format is specified relies
7768 # on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
7769 sub get_default_bootdevices {
7770 my ($conf) = @_;
7771
7772 my @ret = ();
7773
7774 # harddisk
7775 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
7776 push @ret, $first if $first;
7777
7778 # cdrom
7779 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
7780 push @ret, $first if $first;
7781
7782 # network
7783 for (my $i = 0; $i < $MAX_NETS; $i++) {
7784 my $netname = "net$i";
7785 next if !$conf->{$netname};
7786 push @ret, $netname;
7787 last;
7788 }
7789
7790 return \@ret;
7791 }
7792
7793 sub device_bootorder {
7794 my ($conf) = @_;
7795
7796 return bootorder_from_legacy($conf) if !defined($conf->{boot});
7797
7798 my $boot = parse_property_string($boot_fmt, $conf->{boot});
7799
7800 my $bootorder = {};
7801 if (!defined($boot) || $boot->{legacy}) {
7802 $bootorder = bootorder_from_legacy($conf, $boot);
7803 } elsif ($boot->{order}) {
7804 my $i = 100; # start at 100 to allow user to insert devices before us with -args
7805 for my $dev (PVE::Tools::split_list($boot->{order})) {
7806 $bootorder->{$dev} = $i++;
7807 }
7808 }
7809
7810 return $bootorder;
7811 }
7812
7813 sub register_qmeventd_handle {
7814 my ($vmid) = @_;
7815
7816 my $fh;
7817 my $peer = "/var/run/qmeventd.sock";
7818 my $count = 0;
7819
7820 for (;;) {
7821 $count++;
7822 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
7823 last if $fh;
7824 if ($! != EINTR && $! != EAGAIN) {
7825 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
7826 }
7827 if ($count > 4) {
7828 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
7829 . "after $count retries\n";
7830 }
7831 usleep(25000);
7832 }
7833
7834 # send handshake to mark VM as backing up
7835 print $fh to_json({vzdump => {vmid => "$vmid"}});
7836
7837 # return handle to be closed later when inhibit is no longer required
7838 return $fh;
7839 }
7840
7841 # bash completion helper
7842
7843 sub complete_backup_archives {
7844 my ($cmdname, $pname, $cvalue) = @_;
7845
7846 my $cfg = PVE::Storage::config();
7847
7848 my $storeid;
7849
7850 if ($cvalue =~ m/^([^:]+):/) {
7851 $storeid = $1;
7852 }
7853
7854 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
7855
7856 my $res = [];
7857 foreach my $id (keys %$data) {
7858 foreach my $item (@{$data->{$id}}) {
7859 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
7860 push @$res, $item->{volid} if defined($item->{volid});
7861 }
7862 }
7863
7864 return $res;
7865 }
7866
7867 my $complete_vmid_full = sub {
7868 my ($running) = @_;
7869
7870 my $idlist = vmstatus();
7871
7872 my $res = [];
7873
7874 foreach my $id (keys %$idlist) {
7875 my $d = $idlist->{$id};
7876 if (defined($running)) {
7877 next if $d->{template};
7878 next if $running && $d->{status} ne 'running';
7879 next if !$running && $d->{status} eq 'running';
7880 }
7881 push @$res, $id;
7882
7883 }
7884 return $res;
7885 };
7886
7887 sub complete_vmid {
7888 return &$complete_vmid_full();
7889 }
7890
7891 sub complete_vmid_stopped {
7892 return &$complete_vmid_full(0);
7893 }
7894
7895 sub complete_vmid_running {
7896 return &$complete_vmid_full(1);
7897 }
7898
7899 sub complete_storage {
7900
7901 my $cfg = PVE::Storage::config();
7902 my $ids = $cfg->{ids};
7903
7904 my $res = [];
7905 foreach my $sid (keys %$ids) {
7906 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
7907 next if !$ids->{$sid}->{content}->{images};
7908 push @$res, $sid;
7909 }
7910
7911 return $res;
7912 }
7913
7914 sub complete_migration_storage {
7915 my ($cmd, $param, $current_value, $all_args) = @_;
7916
7917 my $targetnode = @$all_args[1];
7918
7919 my $cfg = PVE::Storage::config();
7920 my $ids = $cfg->{ids};
7921
7922 my $res = [];
7923 foreach my $sid (keys %$ids) {
7924 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
7925 next if !$ids->{$sid}->{content}->{images};
7926 push @$res, $sid;
7927 }
7928
7929 return $res;
7930 }
7931
7932 sub vm_is_paused {
7933 my ($vmid) = @_;
7934 my $qmpstatus = eval {
7935 PVE::QemuConfig::assert_config_exists_on_node($vmid);
7936 mon_cmd($vmid, "query-status");
7937 };
7938 warn "$@\n" if $@;
7939 return $qmpstatus && $qmpstatus->{status} eq "paused";
7940 }
7941
7942 sub check_volume_storage_type {
7943 my ($storecfg, $vol) = @_;
7944
7945 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
7946 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7947 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
7948
7949 die "storage '$storeid' does not support content-type '$vtype'\n"
7950 if !$scfg->{content}->{$vtype};
7951
7952 return 1;
7953 }
7954
7955 1;